{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "e1d35417-75f2-49fd-8695-36fb11a1dde3",
   "metadata": {},
   "source": [
    "# Creating"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "b7ec7097-f388-4f9b-af64-a91b718e8a31",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "f45361fb-b69f-4061-b4b0-945027fa054b",
   "metadata": {},
   "outputs": [],
   "source": [
    "label_folder = \"./CICIDS/Raw\"\n",
    "output_file = \"./CICIDS/Raw/CICIDS2017_preprocessed.csv\"\n",
    "df1 = pd.read_csv(label_folder + \"/Monday-WorkingHours.pcap_ISCX.csv\")\n",
    "df2 = pd.read_csv(label_folder + \"/Tuesday-WorkingHours.pcap_ISCX.csv\")\n",
    "df3 = pd.read_csv(label_folder + \"/Wednesday-workingHours.pcap_ISCX.csv\")\n",
    "df4 = pd.read_csv(label_folder + \"/Thursday-WorkingHours-Afternoon-Infilteration.pcap_ISCX.csv\")\n",
    "df5 = pd.read_csv(\n",
    "    label_folder + \"/Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv\",\n",
    "    encoding=\"cp1252\",\n",
    "    low_memory=False,\n",
    ")\n",
    "df6 = pd.read_csv(label_folder + \"/Friday-WorkingHours-Morning.pcap_ISCX.csv\")\n",
    "df7 = pd.read_csv(label_folder + \"/Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv\")\n",
    "df8 = pd.read_csv(label_folder + \"/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "e6c141f0-8bc3-45f3-89c0-4ba25bd63604",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3119345, 85)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8], ignore_index=True)\n",
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "29045af0-1029-4039-89f9-c24dc94e04e3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3119345, 84)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns = df.columns.str.strip()\n",
    "## Duplicate Column; Removal\n",
    "df.drop(columns=[\"Fwd Header Length.1\"], inplace=True)\n",
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "01379342-f238-493a-afae-7baef566b8a7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2830743, 84)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = df.drop(df[pd.isnull(df[\"Flow ID\"])].index)\n",
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "2df00682-b5b3-4f18-b4fb-13a1d79fc4ef",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2827876, 84)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.replace([np.inf, -np.inf], np.nan, inplace=True)\n",
    "df.dropna(inplace=True)\n",
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "076da2e4-b798-4fd5-8a31-42acc4924536",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2827677, 84)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.drop_duplicates(inplace=True)\n",
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "7c02a23a-037c-4c04-9071-cadd115a95a2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Flow ID</th>\n",
       "      <th>Source IP</th>\n",
       "      <th>Source Port</th>\n",
       "      <th>Destination IP</th>\n",
       "      <th>Destination Port</th>\n",
       "      <th>Protocol</th>\n",
       "      <th>Timestamp</th>\n",
       "      <th>Flow Duration</th>\n",
       "      <th>Total Fwd Packets</th>\n",
       "      <th>Total Backward Packets</th>\n",
       "      <th>...</th>\n",
       "      <th>min_seg_size_forward</th>\n",
       "      <th>Active Mean</th>\n",
       "      <th>Active Std</th>\n",
       "      <th>Active Max</th>\n",
       "      <th>Active Min</th>\n",
       "      <th>Idle Mean</th>\n",
       "      <th>Idle Std</th>\n",
       "      <th>Idle Max</th>\n",
       "      <th>Idle Min</th>\n",
       "      <th>Label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>192.168.10.5-8.254.250.126-49188-80-6</td>\n",
       "      <td>8.254.250.126</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.5</td>\n",
       "      <td>49188.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>03/07/2017 08:55:58</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>192.168.10.5-8.254.250.126-49188-80-6</td>\n",
       "      <td>8.254.250.126</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.5</td>\n",
       "      <td>49188.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>03/07/2017 08:55:58</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>192.168.10.14-8.253.185.121-49486-80-6</td>\n",
       "      <td>8.253.185.121</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>49486.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>03/07/2017 08:56:22</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>192.168.10.14-8.253.185.121-49486-80-6</td>\n",
       "      <td>8.253.185.121</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>49486.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>03/07/2017 08:56:22</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>192.168.10.3-192.168.10.9-88-1031-6</td>\n",
       "      <td>192.168.10.9</td>\n",
       "      <td>1031.0</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>88.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>03/07/2017 08:56:38</td>\n",
       "      <td>609.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3119340</th>\n",
       "      <td>192.168.10.15-72.21.91.29-61374-80-6</td>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61374.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>61.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3119341</th>\n",
       "      <td>192.168.10.15-72.21.91.29-61378-80-6</td>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61378.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>72.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3119342</th>\n",
       "      <td>192.168.10.15-72.21.91.29-61375-80-6</td>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61375.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>75.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3119343</th>\n",
       "      <td>192.168.10.15-8.41.222.187-61323-80-6</td>\n",
       "      <td>8.41.222.187</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61323.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>48.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3119344</th>\n",
       "      <td>192.168.10.15-8.43.72.21-61326-80-6</td>\n",
       "      <td>8.43.72.21</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61326.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>68.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2827677 rows × 84 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                        Flow ID      Source IP  Source Port  \\\n",
       "0         192.168.10.5-8.254.250.126-49188-80-6  8.254.250.126         80.0   \n",
       "1         192.168.10.5-8.254.250.126-49188-80-6  8.254.250.126         80.0   \n",
       "4        192.168.10.14-8.253.185.121-49486-80-6  8.253.185.121         80.0   \n",
       "5        192.168.10.14-8.253.185.121-49486-80-6  8.253.185.121         80.0   \n",
       "8           192.168.10.3-192.168.10.9-88-1031-6   192.168.10.9       1031.0   \n",
       "...                                         ...            ...          ...   \n",
       "3119340    192.168.10.15-72.21.91.29-61374-80-6    72.21.91.29         80.0   \n",
       "3119341    192.168.10.15-72.21.91.29-61378-80-6    72.21.91.29         80.0   \n",
       "3119342    192.168.10.15-72.21.91.29-61375-80-6    72.21.91.29         80.0   \n",
       "3119343   192.168.10.15-8.41.222.187-61323-80-6   8.41.222.187         80.0   \n",
       "3119344     192.168.10.15-8.43.72.21-61326-80-6     8.43.72.21         80.0   \n",
       "\n",
       "        Destination IP  Destination Port  Protocol            Timestamp  \\\n",
       "0         192.168.10.5           49188.0       6.0  03/07/2017 08:55:58   \n",
       "1         192.168.10.5           49188.0       6.0  03/07/2017 08:55:58   \n",
       "4        192.168.10.14           49486.0       6.0  03/07/2017 08:56:22   \n",
       "5        192.168.10.14           49486.0       6.0  03/07/2017 08:56:22   \n",
       "8         192.168.10.3              88.0       6.0  03/07/2017 08:56:38   \n",
       "...                ...               ...       ...                  ...   \n",
       "3119340  192.168.10.15           61374.0       6.0        7/7/2017 5:02   \n",
       "3119341  192.168.10.15           61378.0       6.0        7/7/2017 5:02   \n",
       "3119342  192.168.10.15           61375.0       6.0        7/7/2017 5:02   \n",
       "3119343  192.168.10.15           61323.0       6.0        7/7/2017 5:02   \n",
       "3119344  192.168.10.15           61326.0       6.0        7/7/2017 5:02   \n",
       "\n",
       "         Flow Duration  Total Fwd Packets  Total Backward Packets  ...  \\\n",
       "0                  4.0                2.0                     0.0  ...   \n",
       "1                  1.0                2.0                     0.0  ...   \n",
       "4                  3.0                2.0                     0.0  ...   \n",
       "5                  1.0                2.0                     0.0  ...   \n",
       "8                609.0                7.0                     4.0  ...   \n",
       "...                ...                ...                     ...  ...   \n",
       "3119340           61.0                1.0                     1.0  ...   \n",
       "3119341           72.0                1.0                     1.0  ...   \n",
       "3119342           75.0                1.0                     1.0  ...   \n",
       "3119343           48.0                2.0                     0.0  ...   \n",
       "3119344           68.0                1.0                     1.0  ...   \n",
       "\n",
       "         min_seg_size_forward  Active Mean  Active Std  Active Max  \\\n",
       "0                        20.0          0.0         0.0         0.0   \n",
       "1                        20.0          0.0         0.0         0.0   \n",
       "4                        20.0          0.0         0.0         0.0   \n",
       "5                        20.0          0.0         0.0         0.0   \n",
       "8                        20.0          0.0         0.0         0.0   \n",
       "...                       ...          ...         ...         ...   \n",
       "3119340                  20.0          0.0         0.0         0.0   \n",
       "3119341                  20.0          0.0         0.0         0.0   \n",
       "3119342                  20.0          0.0         0.0         0.0   \n",
       "3119343                  20.0          0.0         0.0         0.0   \n",
       "3119344                  20.0          0.0         0.0         0.0   \n",
       "\n",
       "         Active Min  Idle Mean  Idle Std  Idle Max  Idle Min   Label  \n",
       "0               0.0        0.0       0.0       0.0       0.0  BENIGN  \n",
       "1               0.0        0.0       0.0       0.0       0.0  BENIGN  \n",
       "4               0.0        0.0       0.0       0.0       0.0  BENIGN  \n",
       "5               0.0        0.0       0.0       0.0       0.0  BENIGN  \n",
       "8               0.0        0.0       0.0       0.0       0.0  BENIGN  \n",
       "...             ...        ...       ...       ...       ...     ...  \n",
       "3119340         0.0        0.0       0.0       0.0       0.0  BENIGN  \n",
       "3119341         0.0        0.0       0.0       0.0       0.0  BENIGN  \n",
       "3119342         0.0        0.0       0.0       0.0       0.0  BENIGN  \n",
       "3119343         0.0        0.0       0.0       0.0       0.0  BENIGN  \n",
       "3119344         0.0        0.0       0.0       0.0       0.0  BENIGN  \n",
       "\n",
       "[2827677 rows x 84 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "37431d8e-80c5-43c4-bb45-4c30b739ff03",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.rename(columns={'Source IP': 'source_ip', 'Destination IP': 'destination_ip', 'Source Port': 'source_port', 'Destination Port': 'destination_port', 'Protocol': 'protocol', 'Label':'attack_label'}, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "9b13b7b4-7488-4ef4-a0a3-35c68c437ceb",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.drop('Flow ID', axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "bcac94c8-2c92-44ab-acfd-a482019a5b77",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>source_ip</th>\n",
       "      <th>source_port</th>\n",
       "      <th>destination_ip</th>\n",
       "      <th>destination_port</th>\n",
       "      <th>protocol</th>\n",
       "      <th>Timestamp</th>\n",
       "      <th>Flow Duration</th>\n",
       "      <th>Total Fwd Packets</th>\n",
       "      <th>Total Backward Packets</th>\n",
       "      <th>Total Length of Fwd Packets</th>\n",
       "      <th>...</th>\n",
       "      <th>min_seg_size_forward</th>\n",
       "      <th>Active Mean</th>\n",
       "      <th>Active Std</th>\n",
       "      <th>Active Max</th>\n",
       "      <th>Active Min</th>\n",
       "      <th>Idle Mean</th>\n",
       "      <th>Idle Std</th>\n",
       "      <th>Idle Max</th>\n",
       "      <th>Idle Min</th>\n",
       "      <th>attack_label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8.254.250.126</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.5</td>\n",
       "      <td>49188.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>03/07/2017 08:55:58</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>8.254.250.126</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.5</td>\n",
       "      <td>49188.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>03/07/2017 08:55:58</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8.253.185.121</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>49486.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>03/07/2017 08:56:22</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>8.253.185.121</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>49486.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>03/07/2017 08:56:22</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>192.168.10.9</td>\n",
       "      <td>1031.0</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>88.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>03/07/2017 08:56:38</td>\n",
       "      <td>609.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>484.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3119340</th>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61374.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>61.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3119341</th>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61378.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>72.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3119342</th>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61375.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>75.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3119343</th>\n",
       "      <td>8.41.222.187</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61323.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>48.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3119344</th>\n",
       "      <td>8.43.72.21</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61326.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>68.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2827677 rows × 83 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             source_ip  source_port destination_ip  destination_port  \\\n",
       "0        8.254.250.126         80.0   192.168.10.5           49188.0   \n",
       "1        8.254.250.126         80.0   192.168.10.5           49188.0   \n",
       "4        8.253.185.121         80.0  192.168.10.14           49486.0   \n",
       "5        8.253.185.121         80.0  192.168.10.14           49486.0   \n",
       "8         192.168.10.9       1031.0   192.168.10.3              88.0   \n",
       "...                ...          ...            ...               ...   \n",
       "3119340    72.21.91.29         80.0  192.168.10.15           61374.0   \n",
       "3119341    72.21.91.29         80.0  192.168.10.15           61378.0   \n",
       "3119342    72.21.91.29         80.0  192.168.10.15           61375.0   \n",
       "3119343   8.41.222.187         80.0  192.168.10.15           61323.0   \n",
       "3119344     8.43.72.21         80.0  192.168.10.15           61326.0   \n",
       "\n",
       "         protocol            Timestamp  Flow Duration  Total Fwd Packets  \\\n",
       "0             6.0  03/07/2017 08:55:58            4.0                2.0   \n",
       "1             6.0  03/07/2017 08:55:58            1.0                2.0   \n",
       "4             6.0  03/07/2017 08:56:22            3.0                2.0   \n",
       "5             6.0  03/07/2017 08:56:22            1.0                2.0   \n",
       "8             6.0  03/07/2017 08:56:38          609.0                7.0   \n",
       "...           ...                  ...            ...                ...   \n",
       "3119340       6.0        7/7/2017 5:02           61.0                1.0   \n",
       "3119341       6.0        7/7/2017 5:02           72.0                1.0   \n",
       "3119342       6.0        7/7/2017 5:02           75.0                1.0   \n",
       "3119343       6.0        7/7/2017 5:02           48.0                2.0   \n",
       "3119344       6.0        7/7/2017 5:02           68.0                1.0   \n",
       "\n",
       "         Total Backward Packets  Total Length of Fwd Packets  ...  \\\n",
       "0                           0.0                         12.0  ...   \n",
       "1                           0.0                         12.0  ...   \n",
       "4                           0.0                         12.0  ...   \n",
       "5                           0.0                         12.0  ...   \n",
       "8                           4.0                        484.0  ...   \n",
       "...                         ...                          ...  ...   \n",
       "3119340                     1.0                          6.0  ...   \n",
       "3119341                     1.0                          6.0  ...   \n",
       "3119342                     1.0                          6.0  ...   \n",
       "3119343                     0.0                         12.0  ...   \n",
       "3119344                     1.0                          6.0  ...   \n",
       "\n",
       "         min_seg_size_forward  Active Mean  Active Std  Active Max  \\\n",
       "0                        20.0          0.0         0.0         0.0   \n",
       "1                        20.0          0.0         0.0         0.0   \n",
       "4                        20.0          0.0         0.0         0.0   \n",
       "5                        20.0          0.0         0.0         0.0   \n",
       "8                        20.0          0.0         0.0         0.0   \n",
       "...                       ...          ...         ...         ...   \n",
       "3119340                  20.0          0.0         0.0         0.0   \n",
       "3119341                  20.0          0.0         0.0         0.0   \n",
       "3119342                  20.0          0.0         0.0         0.0   \n",
       "3119343                  20.0          0.0         0.0         0.0   \n",
       "3119344                  20.0          0.0         0.0         0.0   \n",
       "\n",
       "         Active Min  Idle Mean  Idle Std  Idle Max  Idle Min  attack_label  \n",
       "0               0.0        0.0       0.0       0.0       0.0        BENIGN  \n",
       "1               0.0        0.0       0.0       0.0       0.0        BENIGN  \n",
       "4               0.0        0.0       0.0       0.0       0.0        BENIGN  \n",
       "5               0.0        0.0       0.0       0.0       0.0        BENIGN  \n",
       "8               0.0        0.0       0.0       0.0       0.0        BENIGN  \n",
       "...             ...        ...       ...       ...       ...           ...  \n",
       "3119340         0.0        0.0       0.0       0.0       0.0        BENIGN  \n",
       "3119341         0.0        0.0       0.0       0.0       0.0        BENIGN  \n",
       "3119342         0.0        0.0       0.0       0.0       0.0        BENIGN  \n",
       "3119343         0.0        0.0       0.0       0.0       0.0        BENIGN  \n",
       "3119344         0.0        0.0       0.0       0.0       0.0        BENIGN  \n",
       "\n",
       "[2827677 rows x 83 columns]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "635a9dbd-bc31-4d27-8174-dee3ce770492",
   "metadata": {},
   "outputs": [],
   "source": [
    "start_value = 1\n",
    "end_value = start_value + len(df)\n",
    "df['flow_id'] = range(start_value, end_value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "d6bd7eea-b4d6-4d9e-b2a7-ea6c6f9edc7f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>source_ip</th>\n",
       "      <th>source_port</th>\n",
       "      <th>destination_ip</th>\n",
       "      <th>destination_port</th>\n",
       "      <th>protocol</th>\n",
       "      <th>Timestamp</th>\n",
       "      <th>Flow Duration</th>\n",
       "      <th>Total Fwd Packets</th>\n",
       "      <th>Total Backward Packets</th>\n",
       "      <th>Total Length of Fwd Packets</th>\n",
       "      <th>...</th>\n",
       "      <th>Active Mean</th>\n",
       "      <th>Active Std</th>\n",
       "      <th>Active Max</th>\n",
       "      <th>Active Min</th>\n",
       "      <th>Idle Mean</th>\n",
       "      <th>Idle Std</th>\n",
       "      <th>Idle Max</th>\n",
       "      <th>Idle Min</th>\n",
       "      <th>attack_label</th>\n",
       "      <th>flow_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8.254.250.126</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.5</td>\n",
       "      <td>49188.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>03/07/2017 08:55:58</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>8.254.250.126</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.5</td>\n",
       "      <td>49188.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>03/07/2017 08:55:58</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8.253.185.121</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>49486.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>03/07/2017 08:56:22</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>8.253.185.121</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>49486.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>03/07/2017 08:56:22</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>192.168.10.9</td>\n",
       "      <td>1031.0</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>88.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>03/07/2017 08:56:38</td>\n",
       "      <td>609.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>484.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3119340</th>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61374.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>61.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "      <td>2827673</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3119341</th>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61378.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>72.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "      <td>2827674</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3119342</th>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61375.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>75.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "      <td>2827675</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3119343</th>\n",
       "      <td>8.41.222.187</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61323.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>48.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "      <td>2827676</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3119344</th>\n",
       "      <td>8.43.72.21</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61326.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>68.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "      <td>2827677</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2827677 rows × 84 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             source_ip  source_port destination_ip  destination_port  \\\n",
       "0        8.254.250.126         80.0   192.168.10.5           49188.0   \n",
       "1        8.254.250.126         80.0   192.168.10.5           49188.0   \n",
       "4        8.253.185.121         80.0  192.168.10.14           49486.0   \n",
       "5        8.253.185.121         80.0  192.168.10.14           49486.0   \n",
       "8         192.168.10.9       1031.0   192.168.10.3              88.0   \n",
       "...                ...          ...            ...               ...   \n",
       "3119340    72.21.91.29         80.0  192.168.10.15           61374.0   \n",
       "3119341    72.21.91.29         80.0  192.168.10.15           61378.0   \n",
       "3119342    72.21.91.29         80.0  192.168.10.15           61375.0   \n",
       "3119343   8.41.222.187         80.0  192.168.10.15           61323.0   \n",
       "3119344     8.43.72.21         80.0  192.168.10.15           61326.0   \n",
       "\n",
       "         protocol            Timestamp  Flow Duration  Total Fwd Packets  \\\n",
       "0             6.0  03/07/2017 08:55:58            4.0                2.0   \n",
       "1             6.0  03/07/2017 08:55:58            1.0                2.0   \n",
       "4             6.0  03/07/2017 08:56:22            3.0                2.0   \n",
       "5             6.0  03/07/2017 08:56:22            1.0                2.0   \n",
       "8             6.0  03/07/2017 08:56:38          609.0                7.0   \n",
       "...           ...                  ...            ...                ...   \n",
       "3119340       6.0        7/7/2017 5:02           61.0                1.0   \n",
       "3119341       6.0        7/7/2017 5:02           72.0                1.0   \n",
       "3119342       6.0        7/7/2017 5:02           75.0                1.0   \n",
       "3119343       6.0        7/7/2017 5:02           48.0                2.0   \n",
       "3119344       6.0        7/7/2017 5:02           68.0                1.0   \n",
       "\n",
       "         Total Backward Packets  Total Length of Fwd Packets  ...  \\\n",
       "0                           0.0                         12.0  ...   \n",
       "1                           0.0                         12.0  ...   \n",
       "4                           0.0                         12.0  ...   \n",
       "5                           0.0                         12.0  ...   \n",
       "8                           4.0                        484.0  ...   \n",
       "...                         ...                          ...  ...   \n",
       "3119340                     1.0                          6.0  ...   \n",
       "3119341                     1.0                          6.0  ...   \n",
       "3119342                     1.0                          6.0  ...   \n",
       "3119343                     0.0                         12.0  ...   \n",
       "3119344                     1.0                          6.0  ...   \n",
       "\n",
       "         Active Mean  Active Std  Active Max  Active Min  Idle Mean  Idle Std  \\\n",
       "0                0.0         0.0         0.0         0.0        0.0       0.0   \n",
       "1                0.0         0.0         0.0         0.0        0.0       0.0   \n",
       "4                0.0         0.0         0.0         0.0        0.0       0.0   \n",
       "5                0.0         0.0         0.0         0.0        0.0       0.0   \n",
       "8                0.0         0.0         0.0         0.0        0.0       0.0   \n",
       "...              ...         ...         ...         ...        ...       ...   \n",
       "3119340          0.0         0.0         0.0         0.0        0.0       0.0   \n",
       "3119341          0.0         0.0         0.0         0.0        0.0       0.0   \n",
       "3119342          0.0         0.0         0.0         0.0        0.0       0.0   \n",
       "3119343          0.0         0.0         0.0         0.0        0.0       0.0   \n",
       "3119344          0.0         0.0         0.0         0.0        0.0       0.0   \n",
       "\n",
       "         Idle Max  Idle Min  attack_label  flow_id  \n",
       "0             0.0       0.0        BENIGN        1  \n",
       "1             0.0       0.0        BENIGN        2  \n",
       "4             0.0       0.0        BENIGN        3  \n",
       "5             0.0       0.0        BENIGN        4  \n",
       "8             0.0       0.0        BENIGN        5  \n",
       "...           ...       ...           ...      ...  \n",
       "3119340       0.0       0.0        BENIGN  2827673  \n",
       "3119341       0.0       0.0        BENIGN  2827674  \n",
       "3119342       0.0       0.0        BENIGN  2827675  \n",
       "3119343       0.0       0.0        BENIGN  2827676  \n",
       "3119344       0.0       0.0        BENIGN  2827677  \n",
       "\n",
       "[2827677 rows x 84 columns]"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "95b4dd91-6640-4452-8f5d-ee4652043ada",
   "metadata": {},
   "outputs": [],
   "source": [
    "flow_id = df.pop('flow_id')\n",
    "df.insert(0, 'flow_id', flow_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "32974ed3-8aeb-45db-8cd8-a3b6aed77f9b",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.reset_index(drop=True, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "9eb3aa54-2a51-4fab-bf27-659c30207c5a",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.rename_axis(None, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "a8145f37-508d-42ef-a9da-8246952c7a12",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>flow_id</th>\n",
       "      <th>source_ip</th>\n",
       "      <th>source_port</th>\n",
       "      <th>destination_ip</th>\n",
       "      <th>destination_port</th>\n",
       "      <th>protocol</th>\n",
       "      <th>Timestamp</th>\n",
       "      <th>Flow Duration</th>\n",
       "      <th>Total Fwd Packets</th>\n",
       "      <th>Total Backward Packets</th>\n",
       "      <th>...</th>\n",
       "      <th>min_seg_size_forward</th>\n",
       "      <th>Active Mean</th>\n",
       "      <th>Active Std</th>\n",
       "      <th>Active Max</th>\n",
       "      <th>Active Min</th>\n",
       "      <th>Idle Mean</th>\n",
       "      <th>Idle Std</th>\n",
       "      <th>Idle Max</th>\n",
       "      <th>Idle Min</th>\n",
       "      <th>attack_label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>8.254.250.126</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.5</td>\n",
       "      <td>49188.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>03/07/2017 08:55:58</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>8.254.250.126</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.5</td>\n",
       "      <td>49188.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>03/07/2017 08:55:58</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>8.253.185.121</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>49486.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>03/07/2017 08:56:22</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>8.253.185.121</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>49486.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>03/07/2017 08:56:22</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>192.168.10.9</td>\n",
       "      <td>1031.0</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>88.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>03/07/2017 08:56:38</td>\n",
       "      <td>609.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827672</th>\n",
       "      <td>2827673</td>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61374.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>61.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827673</th>\n",
       "      <td>2827674</td>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61378.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>72.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827674</th>\n",
       "      <td>2827675</td>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61375.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>75.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827675</th>\n",
       "      <td>2827676</td>\n",
       "      <td>8.41.222.187</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61323.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>48.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827676</th>\n",
       "      <td>2827677</td>\n",
       "      <td>8.43.72.21</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61326.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>68.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2827677 rows × 84 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         flow_id      source_ip  source_port destination_ip  destination_port  \\\n",
       "0              1  8.254.250.126         80.0   192.168.10.5           49188.0   \n",
       "1              2  8.254.250.126         80.0   192.168.10.5           49188.0   \n",
       "2              3  8.253.185.121         80.0  192.168.10.14           49486.0   \n",
       "3              4  8.253.185.121         80.0  192.168.10.14           49486.0   \n",
       "4              5   192.168.10.9       1031.0   192.168.10.3              88.0   \n",
       "...          ...            ...          ...            ...               ...   \n",
       "2827672  2827673    72.21.91.29         80.0  192.168.10.15           61374.0   \n",
       "2827673  2827674    72.21.91.29         80.0  192.168.10.15           61378.0   \n",
       "2827674  2827675    72.21.91.29         80.0  192.168.10.15           61375.0   \n",
       "2827675  2827676   8.41.222.187         80.0  192.168.10.15           61323.0   \n",
       "2827676  2827677     8.43.72.21         80.0  192.168.10.15           61326.0   \n",
       "\n",
       "         protocol            Timestamp  Flow Duration  Total Fwd Packets  \\\n",
       "0             6.0  03/07/2017 08:55:58            4.0                2.0   \n",
       "1             6.0  03/07/2017 08:55:58            1.0                2.0   \n",
       "2             6.0  03/07/2017 08:56:22            3.0                2.0   \n",
       "3             6.0  03/07/2017 08:56:22            1.0                2.0   \n",
       "4             6.0  03/07/2017 08:56:38          609.0                7.0   \n",
       "...           ...                  ...            ...                ...   \n",
       "2827672       6.0        7/7/2017 5:02           61.0                1.0   \n",
       "2827673       6.0        7/7/2017 5:02           72.0                1.0   \n",
       "2827674       6.0        7/7/2017 5:02           75.0                1.0   \n",
       "2827675       6.0        7/7/2017 5:02           48.0                2.0   \n",
       "2827676       6.0        7/7/2017 5:02           68.0                1.0   \n",
       "\n",
       "         Total Backward Packets  ...  min_seg_size_forward  Active Mean  \\\n",
       "0                           0.0  ...                  20.0          0.0   \n",
       "1                           0.0  ...                  20.0          0.0   \n",
       "2                           0.0  ...                  20.0          0.0   \n",
       "3                           0.0  ...                  20.0          0.0   \n",
       "4                           4.0  ...                  20.0          0.0   \n",
       "...                         ...  ...                   ...          ...   \n",
       "2827672                     1.0  ...                  20.0          0.0   \n",
       "2827673                     1.0  ...                  20.0          0.0   \n",
       "2827674                     1.0  ...                  20.0          0.0   \n",
       "2827675                     0.0  ...                  20.0          0.0   \n",
       "2827676                     1.0  ...                  20.0          0.0   \n",
       "\n",
       "         Active Std  Active Max  Active Min  Idle Mean  Idle Std  Idle Max  \\\n",
       "0               0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "1               0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "2               0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "3               0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "4               0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "...             ...         ...         ...        ...       ...       ...   \n",
       "2827672         0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "2827673         0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "2827674         0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "2827675         0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "2827676         0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "\n",
       "         Idle Min  attack_label  \n",
       "0             0.0        BENIGN  \n",
       "1             0.0        BENIGN  \n",
       "2             0.0        BENIGN  \n",
       "3             0.0        BENIGN  \n",
       "4             0.0        BENIGN  \n",
       "...           ...           ...  \n",
       "2827672       0.0        BENIGN  \n",
       "2827673       0.0        BENIGN  \n",
       "2827674       0.0        BENIGN  \n",
       "2827675       0.0        BENIGN  \n",
       "2827676       0.0        BENIGN  \n",
       "\n",
       "[2827677 rows x 84 columns]"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "1a3bae3d-4008-4bbe-8dd5-eda96fcbe41f",
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"protocol\"] = df[\"protocol\"].astype(str)\n",
    "df[\"protocol\"] = df[\"protocol\"].apply(lambda x: x.replace(\"6.0\", \"tcp\"))\n",
    "df[\"protocol\"] = df[\"protocol\"].apply(lambda x: x.replace(\"17.0\", \"udp\"))\n",
    "df[\"protocol\"] = df[\"protocol\"].apply(lambda x: x.replace(\"0.0\", \"other\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "12f53d4c-ccb7-45f1-95cb-0b3b7c8fd8c0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>flow_id</th>\n",
       "      <th>source_ip</th>\n",
       "      <th>source_port</th>\n",
       "      <th>destination_ip</th>\n",
       "      <th>destination_port</th>\n",
       "      <th>protocol</th>\n",
       "      <th>Timestamp</th>\n",
       "      <th>Flow Duration</th>\n",
       "      <th>Total Fwd Packets</th>\n",
       "      <th>Total Backward Packets</th>\n",
       "      <th>...</th>\n",
       "      <th>min_seg_size_forward</th>\n",
       "      <th>Active Mean</th>\n",
       "      <th>Active Std</th>\n",
       "      <th>Active Max</th>\n",
       "      <th>Active Min</th>\n",
       "      <th>Idle Mean</th>\n",
       "      <th>Idle Std</th>\n",
       "      <th>Idle Max</th>\n",
       "      <th>Idle Min</th>\n",
       "      <th>attack_label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>8.254.250.126</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.5</td>\n",
       "      <td>49188.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>03/07/2017 08:55:58</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>8.254.250.126</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.5</td>\n",
       "      <td>49188.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>03/07/2017 08:55:58</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>8.253.185.121</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>49486.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>03/07/2017 08:56:22</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>8.253.185.121</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>49486.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>03/07/2017 08:56:22</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>192.168.10.9</td>\n",
       "      <td>1031.0</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>88.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>03/07/2017 08:56:38</td>\n",
       "      <td>609.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827672</th>\n",
       "      <td>2827673</td>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61374.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>61.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827673</th>\n",
       "      <td>2827674</td>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61378.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>72.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827674</th>\n",
       "      <td>2827675</td>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61375.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>75.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827675</th>\n",
       "      <td>2827676</td>\n",
       "      <td>8.41.222.187</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61323.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>48.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827676</th>\n",
       "      <td>2827677</td>\n",
       "      <td>8.43.72.21</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61326.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>68.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2827677 rows × 84 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         flow_id      source_ip  source_port destination_ip  destination_port  \\\n",
       "0              1  8.254.250.126         80.0   192.168.10.5           49188.0   \n",
       "1              2  8.254.250.126         80.0   192.168.10.5           49188.0   \n",
       "2              3  8.253.185.121         80.0  192.168.10.14           49486.0   \n",
       "3              4  8.253.185.121         80.0  192.168.10.14           49486.0   \n",
       "4              5   192.168.10.9       1031.0   192.168.10.3              88.0   \n",
       "...          ...            ...          ...            ...               ...   \n",
       "2827672  2827673    72.21.91.29         80.0  192.168.10.15           61374.0   \n",
       "2827673  2827674    72.21.91.29         80.0  192.168.10.15           61378.0   \n",
       "2827674  2827675    72.21.91.29         80.0  192.168.10.15           61375.0   \n",
       "2827675  2827676   8.41.222.187         80.0  192.168.10.15           61323.0   \n",
       "2827676  2827677     8.43.72.21         80.0  192.168.10.15           61326.0   \n",
       "\n",
       "        protocol            Timestamp  Flow Duration  Total Fwd Packets  \\\n",
       "0            tcp  03/07/2017 08:55:58            4.0                2.0   \n",
       "1            tcp  03/07/2017 08:55:58            1.0                2.0   \n",
       "2            tcp  03/07/2017 08:56:22            3.0                2.0   \n",
       "3            tcp  03/07/2017 08:56:22            1.0                2.0   \n",
       "4            tcp  03/07/2017 08:56:38          609.0                7.0   \n",
       "...          ...                  ...            ...                ...   \n",
       "2827672      tcp        7/7/2017 5:02           61.0                1.0   \n",
       "2827673      tcp        7/7/2017 5:02           72.0                1.0   \n",
       "2827674      tcp        7/7/2017 5:02           75.0                1.0   \n",
       "2827675      tcp        7/7/2017 5:02           48.0                2.0   \n",
       "2827676      tcp        7/7/2017 5:02           68.0                1.0   \n",
       "\n",
       "         Total Backward Packets  ...  min_seg_size_forward  Active Mean  \\\n",
       "0                           0.0  ...                  20.0          0.0   \n",
       "1                           0.0  ...                  20.0          0.0   \n",
       "2                           0.0  ...                  20.0          0.0   \n",
       "3                           0.0  ...                  20.0          0.0   \n",
       "4                           4.0  ...                  20.0          0.0   \n",
       "...                         ...  ...                   ...          ...   \n",
       "2827672                     1.0  ...                  20.0          0.0   \n",
       "2827673                     1.0  ...                  20.0          0.0   \n",
       "2827674                     1.0  ...                  20.0          0.0   \n",
       "2827675                     0.0  ...                  20.0          0.0   \n",
       "2827676                     1.0  ...                  20.0          0.0   \n",
       "\n",
       "         Active Std  Active Max  Active Min  Idle Mean  Idle Std  Idle Max  \\\n",
       "0               0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "1               0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "2               0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "3               0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "4               0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "...             ...         ...         ...        ...       ...       ...   \n",
       "2827672         0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "2827673         0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "2827674         0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "2827675         0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "2827676         0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "\n",
       "         Idle Min  attack_label  \n",
       "0             0.0        BENIGN  \n",
       "1             0.0        BENIGN  \n",
       "2             0.0        BENIGN  \n",
       "3             0.0        BENIGN  \n",
       "4             0.0        BENIGN  \n",
       "...           ...           ...  \n",
       "2827672       0.0        BENIGN  \n",
       "2827673       0.0        BENIGN  \n",
       "2827674       0.0        BENIGN  \n",
       "2827675       0.0        BENIGN  \n",
       "2827676       0.0        BENIGN  \n",
       "\n",
       "[2827677 rows x 84 columns]"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "6c589dbb-e57a-46f3-a0ff-bd9d1e13e04e",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_csv('./CICIDS/Export/CICIDS_Flow.csv', index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "377cb6a2-b408-4d5f-87af-867d86c801c2",
   "metadata": {},
   "source": [
    "# Processing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "718cbaca-41de-433a-8fc3-b5fbee528a9f",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_3112316/1366810060.py:1: DtypeWarning: Columns (16,18,37,48,49,50,52,55,63,68,69,70,71,72,73,74,75,76,79,81,82,84,89,90,91,92,93,94,95,96,102,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,130,133,134,135,137,138,141,145,150,151,152,153,154,155,156,157,160,161,162,163,164,166,167,168,169,170,171,172,173,174,175,176,177,178,179,183,184,185,186,187,188,189,190,191,193,194,195,196,197,198,199,201,202,204,205,206) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  df1 = pd.read_csv('./CICIDS/output6.csv')\n"
     ]
    }
   ],
   "source": [
    "df1 = pd.read_csv('./CICIDS/output6.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "ef8045a7-12fd-4c27-a82f-87fa3199ef3d",
   "metadata": {},
   "outputs": [],
   "source": [
    "df1.rename(columns={'packet': 'packet_hex', 'payload': 'payload_hex', 'srcip': 'source_ip', 'dstip': 'destination_ip', 'sport': 'source_port', 'dsport': 'destination_port', 'protocol_m': 'protocol'}, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "e7adef88-445e-40dc-8631-d71f191c4d72",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>stime</th>\n",
       "      <th>source_ip</th>\n",
       "      <th>source_port</th>\n",
       "      <th>destination_ip</th>\n",
       "      <th>destination_port</th>\n",
       "      <th>protocol</th>\n",
       "      <th>sttl</th>\n",
       "      <th>total_len</th>\n",
       "      <th>first_layer</th>\n",
       "      <th>packet_hex</th>\n",
       "      <th>...</th>\n",
       "      <th>DNS qd</th>\n",
       "      <th>Kerberos options</th>\n",
       "      <th>Kerberos address</th>\n",
       "      <th>IP copy_flag</th>\n",
       "      <th>IP optclass</th>\n",
       "      <th>IP option</th>\n",
       "      <th>IP length</th>\n",
       "      <th>IP alert</th>\n",
       "      <th>Kerberos flags</th>\n",
       "      <th>Kerberos sname</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.499263e+09</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>52028.0</td>\n",
       "      <td>23.208.102.104</td>\n",
       "      <td>443.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>128.0</td>\n",
       "      <td>41.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>00c1b114eb31b8ac6f3607ee0800450000292897400080...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.499263e+09</td>\n",
       "      <td>192.168.10.19</td>\n",
       "      <td>35443.0</td>\n",
       "      <td>23.208.106.95</td>\n",
       "      <td>443.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>64.0</td>\n",
       "      <td>52.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>00c1b114eb310023ae9badb3080045000034549e400040...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.499263e+09</td>\n",
       "      <td>192.168.10.19</td>\n",
       "      <td>35441.0</td>\n",
       "      <td>23.208.106.95</td>\n",
       "      <td>443.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>64.0</td>\n",
       "      <td>52.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>00c1b114eb310023ae9badb3080045000034a8f0400040...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.499263e+09</td>\n",
       "      <td>192.168.10.17</td>\n",
       "      <td>43828.0</td>\n",
       "      <td>104.88.54.24</td>\n",
       "      <td>443.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>64.0</td>\n",
       "      <td>52.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>00c1b114eb310023ae9b9567080045000034f292400040...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.499263e+09</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>55959.0</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>53.0</td>\n",
       "      <td>udp</td>\n",
       "      <td>128.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>1866da9be37d001e4fd4ca2808004500004f0b71000080...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999995</th>\n",
       "      <td>1.499343e+09</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>49910.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>118.0</td>\n",
       "      <td>2960.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>001e4fd4ca2800c1b114eb31080045000b903cf8400076...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999996</th>\n",
       "      <td>1.499343e+09</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>49910.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>118.0</td>\n",
       "      <td>2960.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>001e4fd4ca2800c1b114eb31080045000b903cf8400076...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999997</th>\n",
       "      <td>1.499343e+09</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>49910.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>118.0</td>\n",
       "      <td>2960.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>001e4fd4ca2800c1b114eb31080045000b903cf6400076...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999998</th>\n",
       "      <td>1.499343e+09</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>49910.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>118.0</td>\n",
       "      <td>2960.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>001e4fd4ca2800c1b114eb31080045000b903cf6400076...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999999</th>\n",
       "      <td>1.499343e+09</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>49910.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>118.0</td>\n",
       "      <td>2960.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>001e4fd4ca2800c1b114eb31080045000b903cf4400076...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10000000 rows × 207 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                stime      source_ip  source_port  destination_ip  \\\n",
       "0        1.499263e+09  192.168.10.14      52028.0  23.208.102.104   \n",
       "1        1.499263e+09  192.168.10.19      35443.0   23.208.106.95   \n",
       "2        1.499263e+09  192.168.10.19      35441.0   23.208.106.95   \n",
       "3        1.499263e+09  192.168.10.17      43828.0    104.88.54.24   \n",
       "4        1.499263e+09  192.168.10.15      55959.0    192.168.10.3   \n",
       "...               ...            ...          ...             ...   \n",
       "9999995  1.499343e+09    13.107.4.50         80.0   192.168.10.15   \n",
       "9999996  1.499343e+09    13.107.4.50         80.0   192.168.10.15   \n",
       "9999997  1.499343e+09    13.107.4.50         80.0   192.168.10.15   \n",
       "9999998  1.499343e+09    13.107.4.50         80.0   192.168.10.15   \n",
       "9999999  1.499343e+09    13.107.4.50         80.0   192.168.10.15   \n",
       "\n",
       "         destination_port protocol   sttl  total_len first_layer  \\\n",
       "0                   443.0      tcp  128.0       41.0    Ethernet   \n",
       "1                   443.0      tcp   64.0       52.0    Ethernet   \n",
       "2                   443.0      tcp   64.0       52.0    Ethernet   \n",
       "3                   443.0      tcp   64.0       52.0    Ethernet   \n",
       "4                    53.0      udp  128.0       79.0    Ethernet   \n",
       "...                   ...      ...    ...        ...         ...   \n",
       "9999995           49910.0      tcp  118.0     2960.0    Ethernet   \n",
       "9999996           49910.0      tcp  118.0     2960.0    Ethernet   \n",
       "9999997           49910.0      tcp  118.0     2960.0    Ethernet   \n",
       "9999998           49910.0      tcp  118.0     2960.0    Ethernet   \n",
       "9999999           49910.0      tcp  118.0     2960.0    Ethernet   \n",
       "\n",
       "                                                packet_hex  ... DNS qd  \\\n",
       "0        00c1b114eb31b8ac6f3607ee0800450000292897400080...  ...    NaN   \n",
       "1        00c1b114eb310023ae9badb3080045000034549e400040...  ...    NaN   \n",
       "2        00c1b114eb310023ae9badb3080045000034a8f0400040...  ...    NaN   \n",
       "3        00c1b114eb310023ae9b9567080045000034f292400040...  ...    NaN   \n",
       "4        1866da9be37d001e4fd4ca2808004500004f0b71000080...  ...    NaN   \n",
       "...                                                    ...  ...    ...   \n",
       "9999995  001e4fd4ca2800c1b114eb31080045000b903cf8400076...  ...    NaN   \n",
       "9999996  001e4fd4ca2800c1b114eb31080045000b903cf8400076...  ...    NaN   \n",
       "9999997  001e4fd4ca2800c1b114eb31080045000b903cf6400076...  ...    NaN   \n",
       "9999998  001e4fd4ca2800c1b114eb31080045000b903cf6400076...  ...    NaN   \n",
       "9999999  001e4fd4ca2800c1b114eb31080045000b903cf4400076...  ...    NaN   \n",
       "\n",
       "         Kerberos options  Kerberos address  IP copy_flag IP optclass  \\\n",
       "0                     NaN               NaN           NaN         NaN   \n",
       "1                     NaN               NaN           NaN         NaN   \n",
       "2                     NaN               NaN           NaN         NaN   \n",
       "3                     NaN               NaN           NaN         NaN   \n",
       "4                     NaN               NaN           NaN         NaN   \n",
       "...                   ...               ...           ...         ...   \n",
       "9999995               NaN               NaN           NaN         NaN   \n",
       "9999996               NaN               NaN           NaN         NaN   \n",
       "9999997               NaN               NaN           NaN         NaN   \n",
       "9999998               NaN               NaN           NaN         NaN   \n",
       "9999999               NaN               NaN           NaN         NaN   \n",
       "\n",
       "         IP option IP length  IP alert Kerberos flags  Kerberos sname  \n",
       "0              NaN       NaN       NaN            NaN             NaN  \n",
       "1              NaN       NaN       NaN            NaN             NaN  \n",
       "2              NaN       NaN       NaN            NaN             NaN  \n",
       "3              NaN       NaN       NaN            NaN             NaN  \n",
       "4              NaN       NaN       NaN            NaN             NaN  \n",
       "...            ...       ...       ...            ...             ...  \n",
       "9999995        NaN       NaN       NaN            NaN             NaN  \n",
       "9999996        NaN       NaN       NaN            NaN             NaN  \n",
       "9999997        NaN       NaN       NaN            NaN             NaN  \n",
       "9999998        NaN       NaN       NaN            NaN             NaN  \n",
       "9999999        NaN       NaN       NaN            NaN             NaN  \n",
       "\n",
       "[10000000 rows x 207 columns]"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "311989a3-8be5-4400-988e-6cd68e213fd6",
   "metadata": {},
   "outputs": [],
   "source": [
    "flow1 = pd.read_csv('./CICIDS/Export/CICIDS_Flow.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "0c6ba6a7-afc1-4854-9aff-0bb838b355c6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>flow_id</th>\n",
       "      <th>source_ip</th>\n",
       "      <th>source_port</th>\n",
       "      <th>destination_ip</th>\n",
       "      <th>destination_port</th>\n",
       "      <th>protocol</th>\n",
       "      <th>Timestamp</th>\n",
       "      <th>Flow Duration</th>\n",
       "      <th>Total Fwd Packets</th>\n",
       "      <th>Total Backward Packets</th>\n",
       "      <th>...</th>\n",
       "      <th>min_seg_size_forward</th>\n",
       "      <th>Active Mean</th>\n",
       "      <th>Active Std</th>\n",
       "      <th>Active Max</th>\n",
       "      <th>Active Min</th>\n",
       "      <th>Idle Mean</th>\n",
       "      <th>Idle Std</th>\n",
       "      <th>Idle Max</th>\n",
       "      <th>Idle Min</th>\n",
       "      <th>attack_label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>8.254.250.126</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.5</td>\n",
       "      <td>49188.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>03/07/2017 08:55:58</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>8.254.250.126</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.5</td>\n",
       "      <td>49188.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>03/07/2017 08:55:58</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>8.253.185.121</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>49486.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>03/07/2017 08:56:22</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>8.253.185.121</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>49486.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>03/07/2017 08:56:22</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>192.168.10.9</td>\n",
       "      <td>1031.0</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>88.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>03/07/2017 08:56:38</td>\n",
       "      <td>609.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827672</th>\n",
       "      <td>2827673</td>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61374.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>61.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827673</th>\n",
       "      <td>2827674</td>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61378.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>72.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827674</th>\n",
       "      <td>2827675</td>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61375.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>75.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827675</th>\n",
       "      <td>2827676</td>\n",
       "      <td>8.41.222.187</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61323.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>48.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827676</th>\n",
       "      <td>2827677</td>\n",
       "      <td>8.43.72.21</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61326.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>68.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2827677 rows × 84 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         flow_id      source_ip  source_port destination_ip  destination_port  \\\n",
       "0              1  8.254.250.126         80.0   192.168.10.5           49188.0   \n",
       "1              2  8.254.250.126         80.0   192.168.10.5           49188.0   \n",
       "2              3  8.253.185.121         80.0  192.168.10.14           49486.0   \n",
       "3              4  8.253.185.121         80.0  192.168.10.14           49486.0   \n",
       "4              5   192.168.10.9       1031.0   192.168.10.3              88.0   \n",
       "...          ...            ...          ...            ...               ...   \n",
       "2827672  2827673    72.21.91.29         80.0  192.168.10.15           61374.0   \n",
       "2827673  2827674    72.21.91.29         80.0  192.168.10.15           61378.0   \n",
       "2827674  2827675    72.21.91.29         80.0  192.168.10.15           61375.0   \n",
       "2827675  2827676   8.41.222.187         80.0  192.168.10.15           61323.0   \n",
       "2827676  2827677     8.43.72.21         80.0  192.168.10.15           61326.0   \n",
       "\n",
       "        protocol            Timestamp  Flow Duration  Total Fwd Packets  \\\n",
       "0            tcp  03/07/2017 08:55:58            4.0                2.0   \n",
       "1            tcp  03/07/2017 08:55:58            1.0                2.0   \n",
       "2            tcp  03/07/2017 08:56:22            3.0                2.0   \n",
       "3            tcp  03/07/2017 08:56:22            1.0                2.0   \n",
       "4            tcp  03/07/2017 08:56:38          609.0                7.0   \n",
       "...          ...                  ...            ...                ...   \n",
       "2827672      tcp        7/7/2017 5:02           61.0                1.0   \n",
       "2827673      tcp        7/7/2017 5:02           72.0                1.0   \n",
       "2827674      tcp        7/7/2017 5:02           75.0                1.0   \n",
       "2827675      tcp        7/7/2017 5:02           48.0                2.0   \n",
       "2827676      tcp        7/7/2017 5:02           68.0                1.0   \n",
       "\n",
       "         Total Backward Packets  ...  min_seg_size_forward  Active Mean  \\\n",
       "0                           0.0  ...                  20.0          0.0   \n",
       "1                           0.0  ...                  20.0          0.0   \n",
       "2                           0.0  ...                  20.0          0.0   \n",
       "3                           0.0  ...                  20.0          0.0   \n",
       "4                           4.0  ...                  20.0          0.0   \n",
       "...                         ...  ...                   ...          ...   \n",
       "2827672                     1.0  ...                  20.0          0.0   \n",
       "2827673                     1.0  ...                  20.0          0.0   \n",
       "2827674                     1.0  ...                  20.0          0.0   \n",
       "2827675                     0.0  ...                  20.0          0.0   \n",
       "2827676                     1.0  ...                  20.0          0.0   \n",
       "\n",
       "         Active Std  Active Max  Active Min  Idle Mean  Idle Std  Idle Max  \\\n",
       "0               0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "1               0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "2               0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "3               0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "4               0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "...             ...         ...         ...        ...       ...       ...   \n",
       "2827672         0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "2827673         0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "2827674         0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "2827675         0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "2827676         0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "\n",
       "         Idle Min  attack_label  \n",
       "0             0.0        BENIGN  \n",
       "1             0.0        BENIGN  \n",
       "2             0.0        BENIGN  \n",
       "3             0.0        BENIGN  \n",
       "4             0.0        BENIGN  \n",
       "...           ...           ...  \n",
       "2827672       0.0        BENIGN  \n",
       "2827673       0.0        BENIGN  \n",
       "2827674       0.0        BENIGN  \n",
       "2827675       0.0        BENIGN  \n",
       "2827676       0.0        BENIGN  \n",
       "\n",
       "[2827677 rows x 84 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "flow1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "e722d71b-3f14-4b72-a6cf-0d6a752406e0",
   "metadata": {},
   "outputs": [],
   "source": [
    "flow1 = flow1[['flow_id', 'source_ip', 'source_port', 'destination_ip', 'destination_port', 'protocol']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "bf3d9cba-0ed8-4174-b499-c7b3325483f6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>flow_id</th>\n",
       "      <th>source_ip</th>\n",
       "      <th>source_port</th>\n",
       "      <th>destination_ip</th>\n",
       "      <th>destination_port</th>\n",
       "      <th>protocol</th>\n",
       "      <th>Timestamp</th>\n",
       "      <th>Flow Duration</th>\n",
       "      <th>Total Fwd Packets</th>\n",
       "      <th>Total Backward Packets</th>\n",
       "      <th>...</th>\n",
       "      <th>min_seg_size_forward</th>\n",
       "      <th>Active Mean</th>\n",
       "      <th>Active Std</th>\n",
       "      <th>Active Max</th>\n",
       "      <th>Active Min</th>\n",
       "      <th>Idle Mean</th>\n",
       "      <th>Idle Std</th>\n",
       "      <th>Idle Max</th>\n",
       "      <th>Idle Min</th>\n",
       "      <th>attack_label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>8.254.250.126</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.5</td>\n",
       "      <td>49188.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>03/07/2017 08:55:58</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>8.254.250.126</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.5</td>\n",
       "      <td>49188.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>03/07/2017 08:55:58</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>8.253.185.121</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>49486.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>03/07/2017 08:56:22</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>8.253.185.121</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>49486.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>03/07/2017 08:56:22</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>192.168.10.9</td>\n",
       "      <td>1031.0</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>88.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>03/07/2017 08:56:38</td>\n",
       "      <td>609.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827672</th>\n",
       "      <td>2827673</td>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61374.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>61.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827673</th>\n",
       "      <td>2827674</td>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61378.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>72.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827674</th>\n",
       "      <td>2827675</td>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61375.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>75.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827675</th>\n",
       "      <td>2827676</td>\n",
       "      <td>8.41.222.187</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61323.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>48.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827676</th>\n",
       "      <td>2827677</td>\n",
       "      <td>8.43.72.21</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61326.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>7/7/2017 5:02</td>\n",
       "      <td>68.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2827677 rows × 84 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         flow_id      source_ip  source_port destination_ip  destination_port  \\\n",
       "0              1  8.254.250.126         80.0   192.168.10.5           49188.0   \n",
       "1              2  8.254.250.126         80.0   192.168.10.5           49188.0   \n",
       "2              3  8.253.185.121         80.0  192.168.10.14           49486.0   \n",
       "3              4  8.253.185.121         80.0  192.168.10.14           49486.0   \n",
       "4              5   192.168.10.9       1031.0   192.168.10.3              88.0   \n",
       "...          ...            ...          ...            ...               ...   \n",
       "2827672  2827673    72.21.91.29         80.0  192.168.10.15           61374.0   \n",
       "2827673  2827674    72.21.91.29         80.0  192.168.10.15           61378.0   \n",
       "2827674  2827675    72.21.91.29         80.0  192.168.10.15           61375.0   \n",
       "2827675  2827676   8.41.222.187         80.0  192.168.10.15           61323.0   \n",
       "2827676  2827677     8.43.72.21         80.0  192.168.10.15           61326.0   \n",
       "\n",
       "        protocol            Timestamp  Flow Duration  Total Fwd Packets  \\\n",
       "0            tcp  03/07/2017 08:55:58            4.0                2.0   \n",
       "1            tcp  03/07/2017 08:55:58            1.0                2.0   \n",
       "2            tcp  03/07/2017 08:56:22            3.0                2.0   \n",
       "3            tcp  03/07/2017 08:56:22            1.0                2.0   \n",
       "4            tcp  03/07/2017 08:56:38          609.0                7.0   \n",
       "...          ...                  ...            ...                ...   \n",
       "2827672      tcp        7/7/2017 5:02           61.0                1.0   \n",
       "2827673      tcp        7/7/2017 5:02           72.0                1.0   \n",
       "2827674      tcp        7/7/2017 5:02           75.0                1.0   \n",
       "2827675      tcp        7/7/2017 5:02           48.0                2.0   \n",
       "2827676      tcp        7/7/2017 5:02           68.0                1.0   \n",
       "\n",
       "         Total Backward Packets  ...  min_seg_size_forward  Active Mean  \\\n",
       "0                           0.0  ...                  20.0          0.0   \n",
       "1                           0.0  ...                  20.0          0.0   \n",
       "2                           0.0  ...                  20.0          0.0   \n",
       "3                           0.0  ...                  20.0          0.0   \n",
       "4                           4.0  ...                  20.0          0.0   \n",
       "...                         ...  ...                   ...          ...   \n",
       "2827672                     1.0  ...                  20.0          0.0   \n",
       "2827673                     1.0  ...                  20.0          0.0   \n",
       "2827674                     1.0  ...                  20.0          0.0   \n",
       "2827675                     0.0  ...                  20.0          0.0   \n",
       "2827676                     1.0  ...                  20.0          0.0   \n",
       "\n",
       "         Active Std  Active Max  Active Min  Idle Mean  Idle Std  Idle Max  \\\n",
       "0               0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "1               0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "2               0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "3               0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "4               0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "...             ...         ...         ...        ...       ...       ...   \n",
       "2827672         0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "2827673         0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "2827674         0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "2827675         0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "2827676         0.0         0.0         0.0        0.0       0.0       0.0   \n",
       "\n",
       "         Idle Min  attack_label  \n",
       "0             0.0        BENIGN  \n",
       "1             0.0        BENIGN  \n",
       "2             0.0        BENIGN  \n",
       "3             0.0        BENIGN  \n",
       "4             0.0        BENIGN  \n",
       "...           ...           ...  \n",
       "2827672       0.0        BENIGN  \n",
       "2827673       0.0        BENIGN  \n",
       "2827674       0.0        BENIGN  \n",
       "2827675       0.0        BENIGN  \n",
       "2827676       0.0        BENIGN  \n",
       "\n",
       "[2827677 rows x 84 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "flow1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "3ff79d22-39a9-4c6f-ac45-e63e3cfee46c",
   "metadata": {},
   "outputs": [],
   "source": [
    "flow2 = flow1.copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "8ccc8db5-2d78-4a89-94dd-3677a1f37a56",
   "metadata": {},
   "outputs": [],
   "source": [
    "flow2.rename(columns={'source_ip': 'destination_ip', 'destination_ip': 'source_ip', 'source_port': 'destination_port', 'destination_port': 'source_port'}, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "f7aefd04-d9c2-4f55-991d-f062bcb10d71",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>flow_id</th>\n",
       "      <th>destination_ip</th>\n",
       "      <th>destination_port</th>\n",
       "      <th>source_ip</th>\n",
       "      <th>source_port</th>\n",
       "      <th>protocol</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>8.254.250.126</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.5</td>\n",
       "      <td>49188.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>8.254.250.126</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.5</td>\n",
       "      <td>49188.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>8.253.185.121</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>49486.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>8.253.185.121</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>49486.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>192.168.10.9</td>\n",
       "      <td>1031.0</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>88.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827672</th>\n",
       "      <td>2827673</td>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61374.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827673</th>\n",
       "      <td>2827674</td>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61378.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827674</th>\n",
       "      <td>2827675</td>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61375.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827675</th>\n",
       "      <td>2827676</td>\n",
       "      <td>8.41.222.187</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61323.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827676</th>\n",
       "      <td>2827677</td>\n",
       "      <td>8.43.72.21</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61326.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2827677 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         flow_id destination_ip  destination_port      source_ip  source_port  \\\n",
       "0              1  8.254.250.126              80.0   192.168.10.5      49188.0   \n",
       "1              2  8.254.250.126              80.0   192.168.10.5      49188.0   \n",
       "2              3  8.253.185.121              80.0  192.168.10.14      49486.0   \n",
       "3              4  8.253.185.121              80.0  192.168.10.14      49486.0   \n",
       "4              5   192.168.10.9            1031.0   192.168.10.3         88.0   \n",
       "...          ...            ...               ...            ...          ...   \n",
       "2827672  2827673    72.21.91.29              80.0  192.168.10.15      61374.0   \n",
       "2827673  2827674    72.21.91.29              80.0  192.168.10.15      61378.0   \n",
       "2827674  2827675    72.21.91.29              80.0  192.168.10.15      61375.0   \n",
       "2827675  2827676   8.41.222.187              80.0  192.168.10.15      61323.0   \n",
       "2827676  2827677     8.43.72.21              80.0  192.168.10.15      61326.0   \n",
       "\n",
       "        protocol  \n",
       "0            tcp  \n",
       "1            tcp  \n",
       "2            tcp  \n",
       "3            tcp  \n",
       "4            tcp  \n",
       "...          ...  \n",
       "2827672      tcp  \n",
       "2827673      tcp  \n",
       "2827674      tcp  \n",
       "2827675      tcp  \n",
       "2827676      tcp  \n",
       "\n",
       "[2827677 rows x 6 columns]"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "flow2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "e17913e8-2c7f-4592-9c30-ef4047188b47",
   "metadata": {},
   "outputs": [],
   "source": [
    "flow = pd.concat([flow1, flow2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "219b9bbd-6238-4e19-8286-6bc22aad1858",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>flow_id</th>\n",
       "      <th>source_ip</th>\n",
       "      <th>source_port</th>\n",
       "      <th>destination_ip</th>\n",
       "      <th>destination_port</th>\n",
       "      <th>protocol</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>8.254.250.126</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.5</td>\n",
       "      <td>49188.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>8.253.185.121</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>49486.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>192.168.10.9</td>\n",
       "      <td>1031.0</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>88.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>192.168.10.9</td>\n",
       "      <td>1032.0</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>88.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>192.168.10.9</td>\n",
       "      <td>1033.0</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>88.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827623</th>\n",
       "      <td>2827624</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>389.0</td>\n",
       "      <td>192.168.10.51</td>\n",
       "      <td>60852.0</td>\n",
       "      <td>udp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827624</th>\n",
       "      <td>2827625</td>\n",
       "      <td>184.84.243.218</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.8</td>\n",
       "      <td>10399.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827625</th>\n",
       "      <td>2827626</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>389.0</td>\n",
       "      <td>192.168.10.8</td>\n",
       "      <td>56007.0</td>\n",
       "      <td>udp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827636</th>\n",
       "      <td>2827637</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>389.0</td>\n",
       "      <td>192.168.10.8</td>\n",
       "      <td>50690.0</td>\n",
       "      <td>udp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827641</th>\n",
       "      <td>2827642</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>445.0</td>\n",
       "      <td>192.168.10.8</td>\n",
       "      <td>10414.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2169274 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         flow_id       source_ip  source_port destination_ip  \\\n",
       "0              1   8.254.250.126         80.0   192.168.10.5   \n",
       "2              3   8.253.185.121         80.0  192.168.10.14   \n",
       "4              5    192.168.10.9       1031.0   192.168.10.3   \n",
       "5              6    192.168.10.9       1032.0   192.168.10.3   \n",
       "6              7    192.168.10.9       1033.0   192.168.10.3   \n",
       "...          ...             ...          ...            ...   \n",
       "2827623  2827624    192.168.10.3        389.0  192.168.10.51   \n",
       "2827624  2827625  184.84.243.218         80.0   192.168.10.8   \n",
       "2827625  2827626    192.168.10.3        389.0   192.168.10.8   \n",
       "2827636  2827637    192.168.10.3        389.0   192.168.10.8   \n",
       "2827641  2827642    192.168.10.3        445.0   192.168.10.8   \n",
       "\n",
       "         destination_port protocol  \n",
       "0                 49188.0      tcp  \n",
       "2                 49486.0      tcp  \n",
       "4                    88.0      tcp  \n",
       "5                    88.0      tcp  \n",
       "6                    88.0      tcp  \n",
       "...                   ...      ...  \n",
       "2827623           60852.0      udp  \n",
       "2827624           10399.0      tcp  \n",
       "2827625           56007.0      udp  \n",
       "2827636           50690.0      udp  \n",
       "2827641           10414.0      tcp  \n",
       "\n",
       "[2169274 rows x 6 columns]"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "flow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "a67aeb21-9a02-493c-8af0-e8d25f3ae426",
   "metadata": {},
   "outputs": [],
   "source": [
    "flow.drop_duplicates(subset=flow.columns.difference(['flow_id']), inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "7469ed0d-1794-4703-9091-6bc603d8c63f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>flow_id</th>\n",
       "      <th>source_ip</th>\n",
       "      <th>source_port</th>\n",
       "      <th>destination_ip</th>\n",
       "      <th>destination_port</th>\n",
       "      <th>protocol</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>8.254.250.126</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.5</td>\n",
       "      <td>49188.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>8.253.185.121</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>49486.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>192.168.10.9</td>\n",
       "      <td>1031.0</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>88.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>192.168.10.9</td>\n",
       "      <td>1032.0</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>88.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>192.168.10.9</td>\n",
       "      <td>1033.0</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>88.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827623</th>\n",
       "      <td>2827624</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>389.0</td>\n",
       "      <td>192.168.10.51</td>\n",
       "      <td>60852.0</td>\n",
       "      <td>udp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827624</th>\n",
       "      <td>2827625</td>\n",
       "      <td>184.84.243.218</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.8</td>\n",
       "      <td>10399.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827625</th>\n",
       "      <td>2827626</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>389.0</td>\n",
       "      <td>192.168.10.8</td>\n",
       "      <td>56007.0</td>\n",
       "      <td>udp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827636</th>\n",
       "      <td>2827637</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>389.0</td>\n",
       "      <td>192.168.10.8</td>\n",
       "      <td>50690.0</td>\n",
       "      <td>udp</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827641</th>\n",
       "      <td>2827642</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>445.0</td>\n",
       "      <td>192.168.10.8</td>\n",
       "      <td>10414.0</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2169274 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         flow_id       source_ip  source_port destination_ip  \\\n",
       "0              1   8.254.250.126         80.0   192.168.10.5   \n",
       "2              3   8.253.185.121         80.0  192.168.10.14   \n",
       "4              5    192.168.10.9       1031.0   192.168.10.3   \n",
       "5              6    192.168.10.9       1032.0   192.168.10.3   \n",
       "6              7    192.168.10.9       1033.0   192.168.10.3   \n",
       "...          ...             ...          ...            ...   \n",
       "2827623  2827624    192.168.10.3        389.0  192.168.10.51   \n",
       "2827624  2827625  184.84.243.218         80.0   192.168.10.8   \n",
       "2827625  2827626    192.168.10.3        389.0   192.168.10.8   \n",
       "2827636  2827637    192.168.10.3        389.0   192.168.10.8   \n",
       "2827641  2827642    192.168.10.3        445.0   192.168.10.8   \n",
       "\n",
       "         destination_port protocol  \n",
       "0                 49188.0      tcp  \n",
       "2                 49486.0      tcp  \n",
       "4                    88.0      tcp  \n",
       "5                    88.0      tcp  \n",
       "6                    88.0      tcp  \n",
       "...                   ...      ...  \n",
       "2827623           60852.0      udp  \n",
       "2827624           10399.0      tcp  \n",
       "2827625           56007.0      udp  \n",
       "2827636           50690.0      udp  \n",
       "2827641           10414.0      tcp  \n",
       "\n",
       "[2169274 rows x 6 columns]"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "flow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "60ec73b2-672a-48c0-aedb-48700d42faea",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>stime</th>\n",
       "      <th>source_ip</th>\n",
       "      <th>source_port</th>\n",
       "      <th>destination_ip</th>\n",
       "      <th>destination_port</th>\n",
       "      <th>protocol</th>\n",
       "      <th>sttl</th>\n",
       "      <th>total_len</th>\n",
       "      <th>first_layer</th>\n",
       "      <th>packet_hex</th>\n",
       "      <th>...</th>\n",
       "      <th>DNS qd</th>\n",
       "      <th>Kerberos options</th>\n",
       "      <th>Kerberos address</th>\n",
       "      <th>IP copy_flag</th>\n",
       "      <th>IP optclass</th>\n",
       "      <th>IP option</th>\n",
       "      <th>IP length</th>\n",
       "      <th>IP alert</th>\n",
       "      <th>Kerberos flags</th>\n",
       "      <th>Kerberos sname</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.499263e+09</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>52028.0</td>\n",
       "      <td>23.208.102.104</td>\n",
       "      <td>443.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>128.0</td>\n",
       "      <td>41.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>00c1b114eb31b8ac6f3607ee0800450000292897400080...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.499263e+09</td>\n",
       "      <td>192.168.10.19</td>\n",
       "      <td>35443.0</td>\n",
       "      <td>23.208.106.95</td>\n",
       "      <td>443.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>64.0</td>\n",
       "      <td>52.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>00c1b114eb310023ae9badb3080045000034549e400040...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.499263e+09</td>\n",
       "      <td>192.168.10.19</td>\n",
       "      <td>35441.0</td>\n",
       "      <td>23.208.106.95</td>\n",
       "      <td>443.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>64.0</td>\n",
       "      <td>52.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>00c1b114eb310023ae9badb3080045000034a8f0400040...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.499263e+09</td>\n",
       "      <td>192.168.10.17</td>\n",
       "      <td>43828.0</td>\n",
       "      <td>104.88.54.24</td>\n",
       "      <td>443.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>64.0</td>\n",
       "      <td>52.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>00c1b114eb310023ae9b9567080045000034f292400040...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.499263e+09</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>55959.0</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>53.0</td>\n",
       "      <td>udp</td>\n",
       "      <td>128.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>1866da9be37d001e4fd4ca2808004500004f0b71000080...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999995</th>\n",
       "      <td>1.499343e+09</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>49910.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>118.0</td>\n",
       "      <td>2960.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>001e4fd4ca2800c1b114eb31080045000b903cf8400076...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999996</th>\n",
       "      <td>1.499343e+09</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>49910.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>118.0</td>\n",
       "      <td>2960.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>001e4fd4ca2800c1b114eb31080045000b903cf8400076...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999997</th>\n",
       "      <td>1.499343e+09</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>49910.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>118.0</td>\n",
       "      <td>2960.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>001e4fd4ca2800c1b114eb31080045000b903cf6400076...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999998</th>\n",
       "      <td>1.499343e+09</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>49910.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>118.0</td>\n",
       "      <td>2960.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>001e4fd4ca2800c1b114eb31080045000b903cf6400076...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999999</th>\n",
       "      <td>1.499343e+09</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>49910.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>118.0</td>\n",
       "      <td>2960.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>001e4fd4ca2800c1b114eb31080045000b903cf4400076...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10000000 rows × 207 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                stime      source_ip  source_port  destination_ip  \\\n",
       "0        1.499263e+09  192.168.10.14      52028.0  23.208.102.104   \n",
       "1        1.499263e+09  192.168.10.19      35443.0   23.208.106.95   \n",
       "2        1.499263e+09  192.168.10.19      35441.0   23.208.106.95   \n",
       "3        1.499263e+09  192.168.10.17      43828.0    104.88.54.24   \n",
       "4        1.499263e+09  192.168.10.15      55959.0    192.168.10.3   \n",
       "...               ...            ...          ...             ...   \n",
       "9999995  1.499343e+09    13.107.4.50         80.0   192.168.10.15   \n",
       "9999996  1.499343e+09    13.107.4.50         80.0   192.168.10.15   \n",
       "9999997  1.499343e+09    13.107.4.50         80.0   192.168.10.15   \n",
       "9999998  1.499343e+09    13.107.4.50         80.0   192.168.10.15   \n",
       "9999999  1.499343e+09    13.107.4.50         80.0   192.168.10.15   \n",
       "\n",
       "         destination_port protocol   sttl  total_len first_layer  \\\n",
       "0                   443.0      tcp  128.0       41.0    Ethernet   \n",
       "1                   443.0      tcp   64.0       52.0    Ethernet   \n",
       "2                   443.0      tcp   64.0       52.0    Ethernet   \n",
       "3                   443.0      tcp   64.0       52.0    Ethernet   \n",
       "4                    53.0      udp  128.0       79.0    Ethernet   \n",
       "...                   ...      ...    ...        ...         ...   \n",
       "9999995           49910.0      tcp  118.0     2960.0    Ethernet   \n",
       "9999996           49910.0      tcp  118.0     2960.0    Ethernet   \n",
       "9999997           49910.0      tcp  118.0     2960.0    Ethernet   \n",
       "9999998           49910.0      tcp  118.0     2960.0    Ethernet   \n",
       "9999999           49910.0      tcp  118.0     2960.0    Ethernet   \n",
       "\n",
       "                                                packet_hex  ... DNS qd  \\\n",
       "0        00c1b114eb31b8ac6f3607ee0800450000292897400080...  ...    NaN   \n",
       "1        00c1b114eb310023ae9badb3080045000034549e400040...  ...    NaN   \n",
       "2        00c1b114eb310023ae9badb3080045000034a8f0400040...  ...    NaN   \n",
       "3        00c1b114eb310023ae9b9567080045000034f292400040...  ...    NaN   \n",
       "4        1866da9be37d001e4fd4ca2808004500004f0b71000080...  ...    NaN   \n",
       "...                                                    ...  ...    ...   \n",
       "9999995  001e4fd4ca2800c1b114eb31080045000b903cf8400076...  ...    NaN   \n",
       "9999996  001e4fd4ca2800c1b114eb31080045000b903cf8400076...  ...    NaN   \n",
       "9999997  001e4fd4ca2800c1b114eb31080045000b903cf6400076...  ...    NaN   \n",
       "9999998  001e4fd4ca2800c1b114eb31080045000b903cf6400076...  ...    NaN   \n",
       "9999999  001e4fd4ca2800c1b114eb31080045000b903cf4400076...  ...    NaN   \n",
       "\n",
       "         Kerberos options  Kerberos address  IP copy_flag IP optclass  \\\n",
       "0                     NaN               NaN           NaN         NaN   \n",
       "1                     NaN               NaN           NaN         NaN   \n",
       "2                     NaN               NaN           NaN         NaN   \n",
       "3                     NaN               NaN           NaN         NaN   \n",
       "4                     NaN               NaN           NaN         NaN   \n",
       "...                   ...               ...           ...         ...   \n",
       "9999995               NaN               NaN           NaN         NaN   \n",
       "9999996               NaN               NaN           NaN         NaN   \n",
       "9999997               NaN               NaN           NaN         NaN   \n",
       "9999998               NaN               NaN           NaN         NaN   \n",
       "9999999               NaN               NaN           NaN         NaN   \n",
       "\n",
       "         IP option IP length  IP alert Kerberos flags  Kerberos sname  \n",
       "0              NaN       NaN       NaN            NaN             NaN  \n",
       "1              NaN       NaN       NaN            NaN             NaN  \n",
       "2              NaN       NaN       NaN            NaN             NaN  \n",
       "3              NaN       NaN       NaN            NaN             NaN  \n",
       "4              NaN       NaN       NaN            NaN             NaN  \n",
       "...            ...       ...       ...            ...             ...  \n",
       "9999995        NaN       NaN       NaN            NaN             NaN  \n",
       "9999996        NaN       NaN       NaN            NaN             NaN  \n",
       "9999997        NaN       NaN       NaN            NaN             NaN  \n",
       "9999998        NaN       NaN       NaN            NaN             NaN  \n",
       "9999999        NaN       NaN       NaN            NaN             NaN  \n",
       "\n",
       "[10000000 rows x 207 columns]"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "19ae7493-353c-4fa2-8cdc-a82d1bf73618",
   "metadata": {},
   "outputs": [],
   "source": [
    "df1['source_port'] = df1['source_port'].astype(int)\n",
    "df1['destination_port'] = df1['destination_port'].astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "ff0ed3c7-5937-48f1-ba25-c2a7bbe78077",
   "metadata": {},
   "outputs": [],
   "source": [
    "columns_to_match = ['source_ip', 'source_port', 'destination_ip', 'destination_port', 'protocol']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "20eae60d-d3fb-4fe0-8e2a-2454d86380d6",
   "metadata": {},
   "outputs": [],
   "source": [
    "merged_df = df1.merge(flow, on=columns_to_match, how='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "acc23f40-dea2-460f-94ab-116992d9809f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>stime</th>\n",
       "      <th>source_ip</th>\n",
       "      <th>source_port</th>\n",
       "      <th>destination_ip</th>\n",
       "      <th>destination_port</th>\n",
       "      <th>protocol</th>\n",
       "      <th>sttl</th>\n",
       "      <th>total_len</th>\n",
       "      <th>first_layer</th>\n",
       "      <th>packet_hex</th>\n",
       "      <th>...</th>\n",
       "      <th>Kerberos options</th>\n",
       "      <th>Kerberos address</th>\n",
       "      <th>IP copy_flag</th>\n",
       "      <th>IP optclass</th>\n",
       "      <th>IP option</th>\n",
       "      <th>IP length</th>\n",
       "      <th>IP alert</th>\n",
       "      <th>Kerberos flags</th>\n",
       "      <th>Kerberos sname</th>\n",
       "      <th>flow_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.499263e+09</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>52028</td>\n",
       "      <td>23.208.102.104</td>\n",
       "      <td>443</td>\n",
       "      <td>tcp</td>\n",
       "      <td>128.0</td>\n",
       "      <td>41.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>00c1b114eb31b8ac6f3607ee0800450000292897400080...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1206778</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.499263e+09</td>\n",
       "      <td>192.168.10.19</td>\n",
       "      <td>35443</td>\n",
       "      <td>23.208.106.95</td>\n",
       "      <td>443</td>\n",
       "      <td>tcp</td>\n",
       "      <td>64.0</td>\n",
       "      <td>52.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>00c1b114eb310023ae9badb3080045000034549e400040...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1211004</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.499263e+09</td>\n",
       "      <td>192.168.10.19</td>\n",
       "      <td>35441</td>\n",
       "      <td>23.208.106.95</td>\n",
       "      <td>443</td>\n",
       "      <td>tcp</td>\n",
       "      <td>64.0</td>\n",
       "      <td>52.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>00c1b114eb310023ae9badb3080045000034a8f0400040...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1211010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.499263e+09</td>\n",
       "      <td>192.168.10.17</td>\n",
       "      <td>43828</td>\n",
       "      <td>104.88.54.24</td>\n",
       "      <td>443</td>\n",
       "      <td>tcp</td>\n",
       "      <td>64.0</td>\n",
       "      <td>52.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>00c1b114eb310023ae9b9567080045000034f292400040...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1222888</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.499263e+09</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>55959</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>53</td>\n",
       "      <td>udp</td>\n",
       "      <td>128.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>1866da9be37d001e4fd4ca2808004500004f0b71000080...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>904284</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999995</th>\n",
       "      <td>1.499343e+09</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>49910</td>\n",
       "      <td>tcp</td>\n",
       "      <td>118.0</td>\n",
       "      <td>2960.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>001e4fd4ca2800c1b114eb31080045000b903cf8400076...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1960483</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999996</th>\n",
       "      <td>1.499343e+09</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>49910</td>\n",
       "      <td>tcp</td>\n",
       "      <td>118.0</td>\n",
       "      <td>2960.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>001e4fd4ca2800c1b114eb31080045000b903cf8400076...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1960483</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999997</th>\n",
       "      <td>1.499343e+09</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>49910</td>\n",
       "      <td>tcp</td>\n",
       "      <td>118.0</td>\n",
       "      <td>2960.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>001e4fd4ca2800c1b114eb31080045000b903cf6400076...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1960483</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999998</th>\n",
       "      <td>1.499343e+09</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>49910</td>\n",
       "      <td>tcp</td>\n",
       "      <td>118.0</td>\n",
       "      <td>2960.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>001e4fd4ca2800c1b114eb31080045000b903cf6400076...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1960483</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999999</th>\n",
       "      <td>1.499343e+09</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>49910</td>\n",
       "      <td>tcp</td>\n",
       "      <td>118.0</td>\n",
       "      <td>2960.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>001e4fd4ca2800c1b114eb31080045000b903cf4400076...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1960483</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10000000 rows × 208 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                stime      source_ip  source_port  destination_ip  \\\n",
       "0        1.499263e+09  192.168.10.14        52028  23.208.102.104   \n",
       "1        1.499263e+09  192.168.10.19        35443   23.208.106.95   \n",
       "2        1.499263e+09  192.168.10.19        35441   23.208.106.95   \n",
       "3        1.499263e+09  192.168.10.17        43828    104.88.54.24   \n",
       "4        1.499263e+09  192.168.10.15        55959    192.168.10.3   \n",
       "...               ...            ...          ...             ...   \n",
       "9999995  1.499343e+09    13.107.4.50           80   192.168.10.15   \n",
       "9999996  1.499343e+09    13.107.4.50           80   192.168.10.15   \n",
       "9999997  1.499343e+09    13.107.4.50           80   192.168.10.15   \n",
       "9999998  1.499343e+09    13.107.4.50           80   192.168.10.15   \n",
       "9999999  1.499343e+09    13.107.4.50           80   192.168.10.15   \n",
       "\n",
       "         destination_port protocol   sttl  total_len first_layer  \\\n",
       "0                     443      tcp  128.0       41.0    Ethernet   \n",
       "1                     443      tcp   64.0       52.0    Ethernet   \n",
       "2                     443      tcp   64.0       52.0    Ethernet   \n",
       "3                     443      tcp   64.0       52.0    Ethernet   \n",
       "4                      53      udp  128.0       79.0    Ethernet   \n",
       "...                   ...      ...    ...        ...         ...   \n",
       "9999995             49910      tcp  118.0     2960.0    Ethernet   \n",
       "9999996             49910      tcp  118.0     2960.0    Ethernet   \n",
       "9999997             49910      tcp  118.0     2960.0    Ethernet   \n",
       "9999998             49910      tcp  118.0     2960.0    Ethernet   \n",
       "9999999             49910      tcp  118.0     2960.0    Ethernet   \n",
       "\n",
       "                                                packet_hex  ...  \\\n",
       "0        00c1b114eb31b8ac6f3607ee0800450000292897400080...  ...   \n",
       "1        00c1b114eb310023ae9badb3080045000034549e400040...  ...   \n",
       "2        00c1b114eb310023ae9badb3080045000034a8f0400040...  ...   \n",
       "3        00c1b114eb310023ae9b9567080045000034f292400040...  ...   \n",
       "4        1866da9be37d001e4fd4ca2808004500004f0b71000080...  ...   \n",
       "...                                                    ...  ...   \n",
       "9999995  001e4fd4ca2800c1b114eb31080045000b903cf8400076...  ...   \n",
       "9999996  001e4fd4ca2800c1b114eb31080045000b903cf8400076...  ...   \n",
       "9999997  001e4fd4ca2800c1b114eb31080045000b903cf6400076...  ...   \n",
       "9999998  001e4fd4ca2800c1b114eb31080045000b903cf6400076...  ...   \n",
       "9999999  001e4fd4ca2800c1b114eb31080045000b903cf4400076...  ...   \n",
       "\n",
       "        Kerberos options  Kerberos address  IP copy_flag  IP optclass  \\\n",
       "0                    NaN               NaN           NaN          NaN   \n",
       "1                    NaN               NaN           NaN          NaN   \n",
       "2                    NaN               NaN           NaN          NaN   \n",
       "3                    NaN               NaN           NaN          NaN   \n",
       "4                    NaN               NaN           NaN          NaN   \n",
       "...                  ...               ...           ...          ...   \n",
       "9999995              NaN               NaN           NaN          NaN   \n",
       "9999996              NaN               NaN           NaN          NaN   \n",
       "9999997              NaN               NaN           NaN          NaN   \n",
       "9999998              NaN               NaN           NaN          NaN   \n",
       "9999999              NaN               NaN           NaN          NaN   \n",
       "\n",
       "        IP option  IP length IP alert  Kerberos flags Kerberos sname  flow_id  \n",
       "0             NaN        NaN      NaN             NaN            NaN  1206778  \n",
       "1             NaN        NaN      NaN             NaN            NaN  1211004  \n",
       "2             NaN        NaN      NaN             NaN            NaN  1211010  \n",
       "3             NaN        NaN      NaN             NaN            NaN  1222888  \n",
       "4             NaN        NaN      NaN             NaN            NaN   904284  \n",
       "...           ...        ...      ...             ...            ...      ...  \n",
       "9999995       NaN        NaN      NaN             NaN            NaN  1960483  \n",
       "9999996       NaN        NaN      NaN             NaN            NaN  1960483  \n",
       "9999997       NaN        NaN      NaN             NaN            NaN  1960483  \n",
       "9999998       NaN        NaN      NaN             NaN            NaN  1960483  \n",
       "9999999       NaN        NaN      NaN             NaN            NaN  1960483  \n",
       "\n",
       "[10000000 rows x 208 columns]"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "e5e9e9b8-d64c-4aa4-982f-4b5fe04a10b4",
   "metadata": {},
   "outputs": [],
   "source": [
    "flow_id = merged_df.pop('flow_id')\n",
    "merged_df.insert(0, 'flow_id', flow_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "acae3b63-03ba-4f65-ae1e-29e373a6c3e7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>flow_id</th>\n",
       "      <th>stime</th>\n",
       "      <th>source_ip</th>\n",
       "      <th>source_port</th>\n",
       "      <th>destination_ip</th>\n",
       "      <th>destination_port</th>\n",
       "      <th>protocol</th>\n",
       "      <th>sttl</th>\n",
       "      <th>total_len</th>\n",
       "      <th>first_layer</th>\n",
       "      <th>...</th>\n",
       "      <th>DNS qd</th>\n",
       "      <th>Kerberos options</th>\n",
       "      <th>Kerberos address</th>\n",
       "      <th>IP copy_flag</th>\n",
       "      <th>IP optclass</th>\n",
       "      <th>IP option</th>\n",
       "      <th>IP length</th>\n",
       "      <th>IP alert</th>\n",
       "      <th>Kerberos flags</th>\n",
       "      <th>Kerberos sname</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1206778</td>\n",
       "      <td>1.499263e+09</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>52028</td>\n",
       "      <td>23.208.102.104</td>\n",
       "      <td>443</td>\n",
       "      <td>tcp</td>\n",
       "      <td>128.0</td>\n",
       "      <td>41.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1211004</td>\n",
       "      <td>1.499263e+09</td>\n",
       "      <td>192.168.10.19</td>\n",
       "      <td>35443</td>\n",
       "      <td>23.208.106.95</td>\n",
       "      <td>443</td>\n",
       "      <td>tcp</td>\n",
       "      <td>64.0</td>\n",
       "      <td>52.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1211010</td>\n",
       "      <td>1.499263e+09</td>\n",
       "      <td>192.168.10.19</td>\n",
       "      <td>35441</td>\n",
       "      <td>23.208.106.95</td>\n",
       "      <td>443</td>\n",
       "      <td>tcp</td>\n",
       "      <td>64.0</td>\n",
       "      <td>52.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1222888</td>\n",
       "      <td>1.499263e+09</td>\n",
       "      <td>192.168.10.17</td>\n",
       "      <td>43828</td>\n",
       "      <td>104.88.54.24</td>\n",
       "      <td>443</td>\n",
       "      <td>tcp</td>\n",
       "      <td>64.0</td>\n",
       "      <td>52.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>904284</td>\n",
       "      <td>1.499263e+09</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>55959</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>53</td>\n",
       "      <td>udp</td>\n",
       "      <td>128.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999995</th>\n",
       "      <td>1960483</td>\n",
       "      <td>1.499343e+09</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>49910</td>\n",
       "      <td>tcp</td>\n",
       "      <td>118.0</td>\n",
       "      <td>2960.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999996</th>\n",
       "      <td>1960483</td>\n",
       "      <td>1.499343e+09</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>49910</td>\n",
       "      <td>tcp</td>\n",
       "      <td>118.0</td>\n",
       "      <td>2960.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999997</th>\n",
       "      <td>1960483</td>\n",
       "      <td>1.499343e+09</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>49910</td>\n",
       "      <td>tcp</td>\n",
       "      <td>118.0</td>\n",
       "      <td>2960.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999998</th>\n",
       "      <td>1960483</td>\n",
       "      <td>1.499343e+09</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>49910</td>\n",
       "      <td>tcp</td>\n",
       "      <td>118.0</td>\n",
       "      <td>2960.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999999</th>\n",
       "      <td>1960483</td>\n",
       "      <td>1.499343e+09</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>49910</td>\n",
       "      <td>tcp</td>\n",
       "      <td>118.0</td>\n",
       "      <td>2960.0</td>\n",
       "      <td>Ethernet</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10000000 rows × 208 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         flow_id         stime      source_ip  source_port  destination_ip  \\\n",
       "0        1206778  1.499263e+09  192.168.10.14        52028  23.208.102.104   \n",
       "1        1211004  1.499263e+09  192.168.10.19        35443   23.208.106.95   \n",
       "2        1211010  1.499263e+09  192.168.10.19        35441   23.208.106.95   \n",
       "3        1222888  1.499263e+09  192.168.10.17        43828    104.88.54.24   \n",
       "4         904284  1.499263e+09  192.168.10.15        55959    192.168.10.3   \n",
       "...          ...           ...            ...          ...             ...   \n",
       "9999995  1960483  1.499343e+09    13.107.4.50           80   192.168.10.15   \n",
       "9999996  1960483  1.499343e+09    13.107.4.50           80   192.168.10.15   \n",
       "9999997  1960483  1.499343e+09    13.107.4.50           80   192.168.10.15   \n",
       "9999998  1960483  1.499343e+09    13.107.4.50           80   192.168.10.15   \n",
       "9999999  1960483  1.499343e+09    13.107.4.50           80   192.168.10.15   \n",
       "\n",
       "         destination_port protocol   sttl  total_len first_layer  ... DNS qd  \\\n",
       "0                     443      tcp  128.0       41.0    Ethernet  ...    NaN   \n",
       "1                     443      tcp   64.0       52.0    Ethernet  ...    NaN   \n",
       "2                     443      tcp   64.0       52.0    Ethernet  ...    NaN   \n",
       "3                     443      tcp   64.0       52.0    Ethernet  ...    NaN   \n",
       "4                      53      udp  128.0       79.0    Ethernet  ...    NaN   \n",
       "...                   ...      ...    ...        ...         ...  ...    ...   \n",
       "9999995             49910      tcp  118.0     2960.0    Ethernet  ...    NaN   \n",
       "9999996             49910      tcp  118.0     2960.0    Ethernet  ...    NaN   \n",
       "9999997             49910      tcp  118.0     2960.0    Ethernet  ...    NaN   \n",
       "9999998             49910      tcp  118.0     2960.0    Ethernet  ...    NaN   \n",
       "9999999             49910      tcp  118.0     2960.0    Ethernet  ...    NaN   \n",
       "\n",
       "        Kerberos options  Kerberos address  IP copy_flag  IP optclass  \\\n",
       "0                    NaN               NaN           NaN          NaN   \n",
       "1                    NaN               NaN           NaN          NaN   \n",
       "2                    NaN               NaN           NaN          NaN   \n",
       "3                    NaN               NaN           NaN          NaN   \n",
       "4                    NaN               NaN           NaN          NaN   \n",
       "...                  ...               ...           ...          ...   \n",
       "9999995              NaN               NaN           NaN          NaN   \n",
       "9999996              NaN               NaN           NaN          NaN   \n",
       "9999997              NaN               NaN           NaN          NaN   \n",
       "9999998              NaN               NaN           NaN          NaN   \n",
       "9999999              NaN               NaN           NaN          NaN   \n",
       "\n",
       "        IP option  IP length IP alert  Kerberos flags Kerberos sname  \n",
       "0             NaN        NaN      NaN             NaN            NaN  \n",
       "1             NaN        NaN      NaN             NaN            NaN  \n",
       "2             NaN        NaN      NaN             NaN            NaN  \n",
       "3             NaN        NaN      NaN             NaN            NaN  \n",
       "4             NaN        NaN      NaN             NaN            NaN  \n",
       "...           ...        ...      ...             ...            ...  \n",
       "9999995       NaN        NaN      NaN             NaN            NaN  \n",
       "9999996       NaN        NaN      NaN             NaN            NaN  \n",
       "9999997       NaN        NaN      NaN             NaN            NaN  \n",
       "9999998       NaN        NaN      NaN             NaN            NaN  \n",
       "9999999       NaN        NaN      NaN             NaN            NaN  \n",
       "\n",
       "[10000000 rows x 208 columns]"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged_df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "07d3921f-3c6a-4877-acd6-223a80b63c35",
   "metadata": {},
   "source": [
    "# Testing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "25aac04e-1ba9-4ce0-90c9-ee360b0d8db4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged_df.flow_id.isna().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 271,
   "id": "586eced8-7534-4ec0-9ea2-f87286605479",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "stime                                1421927414\n",
       "source_ip                           10.40.182.3\n",
       "source_port                                   0\n",
       "destination_ip                      10.40.182.1\n",
       "destination_port                              0\n",
       "                                       ...     \n",
       "HSRP MD5 Authentication sourceip            NaN\n",
       "HSRP MD5 Authentication keyid               NaN\n",
       "SCTPChunkInit addr                          NaN\n",
       "flow_id                                148629.0\n",
       "protocol_y                                  arp\n",
       "Name: 13, Length: 389, dtype: object"
      ]
     },
     "execution_count": 271,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged_df.iloc[13]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 254,
   "id": "fdba8adc-d5eb-4c80-9eec-d02f09a272e2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>flow_id</th>\n",
       "      <th>source_ip</th>\n",
       "      <th>source_port</th>\n",
       "      <th>destination_ip</th>\n",
       "      <th>destination_port</th>\n",
       "      <th>protocol</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [flow_id, source_ip, source_port, destination_ip, destination_port, protocol]\n",
       "Index: []"
      ]
     },
     "execution_count": 254,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "flow[flow.protocol=='others']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 272,
   "id": "3c36a765-96e3-4b67-a3e2-c59065081a5f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>flow_id</th>\n",
       "      <th>source_ip</th>\n",
       "      <th>source_port</th>\n",
       "      <th>destination_ip</th>\n",
       "      <th>destination_port</th>\n",
       "      <th>protocol</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1966462</th>\n",
       "      <td>521</td>\n",
       "      <td>10.40.182.3</td>\n",
       "      <td>0</td>\n",
       "      <td>10.40.182.1</td>\n",
       "      <td>0</td>\n",
       "      <td>others</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         flow_id    source_ip  source_port destination_ip  destination_port  \\\n",
       "1966462      521  10.40.182.3            0    10.40.182.1                 0   \n",
       "\n",
       "        protocol  \n",
       "1966462   others  "
      ]
     },
     "execution_count": 272,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "flow[(flow['source_ip']=='10.40.182.3') & (flow['destination_ip']=='10.40.182.1') & (flow['source_port']==0)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 212,
   "id": "0ed49900-c32b-4183-8f3a-e00a28d29984",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>flow_id</th>\n",
       "      <th>source_ip</th>\n",
       "      <th>source_port</th>\n",
       "      <th>destination_ip</th>\n",
       "      <th>destination_port</th>\n",
       "      <th>protocol</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>111536</th>\n",
       "      <td>115902</td>\n",
       "      <td>59.166.0.3</td>\n",
       "      <td>6103</td>\n",
       "      <td>149.171.126.4</td>\n",
       "      <td>52633</td>\n",
       "      <td>tcp</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        flow_id   source_ip source_port destination_ip destination_port  \\\n",
       "111536   115902  59.166.0.3        6103  149.171.126.4            52633   \n",
       "\n",
       "       protocol  \n",
       "111536      tcp  "
      ]
     },
     "execution_count": 212,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "flow[(flow['source_ip']=='59.166.0.3') & (flow['destination_ip']=='149.171.126.4') & (flow['source_port']==6103) & (flow['destination_port']=='52633')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 175,
   "id": "7ffb1cd4-bb8f-4a3c-aa32-2b7b9170e375",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "NaN         9935521\n",
       "664410.0       1904\n",
       "791440.0       1312\n",
       "806229.0       1308\n",
       "752940.0       1308\n",
       "             ...   \n",
       "346583.0          4\n",
       "723549.0          4\n",
       "821934.0          2\n",
       "189265.0          2\n",
       "149675.0          2\n",
       "Name: flow_id, Length: 847, dtype: int64"
      ]
     },
     "execution_count": 175,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged_df.flow_id.value_counts(dropna=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "0e704965-7160-4537-91b6-3e94d8c2807f",
   "metadata": {},
   "outputs": [],
   "source": [
    "del df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "29af2c50-596a-4e1c-a28a-ee3f8ffcb376",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_3440497/1533487670.py:1: DtypeWarning: Columns (25,37,38,40,43,51,56,57,58,59,60,61,63,64,65,66,67,70,71,72,73,74,75,76,78,83,84,85,86,87,88,89,90,96,100,101,102,103,104,105,106,107,108,109,110,111,112,113,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,134,138,142,144,145,148,149,150,152) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  df = pd.read_csv('./CICIDS/Export/Packet-Fields/Packet_Fields_File_1.csv')\n"
     ]
    }
   ],
   "source": [
    "df = pd.read_csv('./CICIDS/Export/Packet-Fields/Packet_Fields_File_1.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "c1b54e30-9d49-4335-b6d5-30c2ddca692c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>packet_id</th>\n",
       "      <th>flow_id</th>\n",
       "      <th>source_ip</th>\n",
       "      <th>source_port</th>\n",
       "      <th>destination_ip</th>\n",
       "      <th>destination_port</th>\n",
       "      <th>protocol</th>\n",
       "      <th>packet_hex</th>\n",
       "      <th>payload_hex</th>\n",
       "      <th>Ethernet dst</th>\n",
       "      <th>...</th>\n",
       "      <th>PPTP data</th>\n",
       "      <th>PPTP Incoming Call Connected len</th>\n",
       "      <th>PPTP Incoming Call Connected type</th>\n",
       "      <th>PPTP Outgoing Call Reply len</th>\n",
       "      <th>PPTP Outgoing Call Reply type</th>\n",
       "      <th>PPTP Outgoing Call Reply call_id</th>\n",
       "      <th>Skinny len</th>\n",
       "      <th>Skinny res</th>\n",
       "      <th>Skinny msg</th>\n",
       "      <th>attack_label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>11952482</td>\n",
       "      <td>548448</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>50128</td>\n",
       "      <td>tcp</td>\n",
       "      <td>b8ac6f3607ee00c1b114eb31080045000b905b26400077...</td>\n",
       "      <td>b0324fc9a453eadd23fefd8cc2b96da89210015fd420ab...</td>\n",
       "      <td>b8:ac:6f:36:07:ee</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>11952483</td>\n",
       "      <td>548448</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>50128</td>\n",
       "      <td>tcp</td>\n",
       "      <td>b8ac6f3607ee00c1b114eb31080045000b905b28400077...</td>\n",
       "      <td>6f2d76414e7acb086a62445556218cd2f21ad54483cd95...</td>\n",
       "      <td>b8:ac:6f:36:07:ee</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>11952485</td>\n",
       "      <td>548448</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>50128</td>\n",
       "      <td>tcp</td>\n",
       "      <td>b8ac6f3607ee00c1b114eb310800450005dc5b2a400077...</td>\n",
       "      <td>9b42249338c0f7bd5019393a265532dbae266d32a8418c...</td>\n",
       "      <td>b8:ac:6f:36:07:ee</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>11952487</td>\n",
       "      <td>548448</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>50128</td>\n",
       "      <td>tcp</td>\n",
       "      <td>b8ac6f3607ee00c1b114eb31080045000b905b2b400077...</td>\n",
       "      <td>582baa0225e7949925e6747162dbc2f350c554ab64ee36...</td>\n",
       "      <td>b8:ac:6f:36:07:ee</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>11952489</td>\n",
       "      <td>548448</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>50128</td>\n",
       "      <td>tcp</td>\n",
       "      <td>b8ac6f3607ee00c1b114eb31080045000b905b2d400077...</td>\n",
       "      <td>e11d7365760201b310a2eb99a4430d6bd1f8ebdec35d76...</td>\n",
       "      <td>b8:ac:6f:36:07:ee</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2588702</th>\n",
       "      <td>16952477</td>\n",
       "      <td>548448</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>50128</td>\n",
       "      <td>tcp</td>\n",
       "      <td>b8ac6f3607ee00c1b114eb31080045000b901345400077...</td>\n",
       "      <td>ec414b08efb94aa70cf0a5ed9e9654e1d2686aeade1470...</td>\n",
       "      <td>b8:ac:6f:36:07:ee</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2588703</th>\n",
       "      <td>16952478</td>\n",
       "      <td>542647</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>50128</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80</td>\n",
       "      <td>tcp</td>\n",
       "      <td>00c1b114eb31b8ac6f3607ee08004500002852b9400080...</td>\n",
       "      <td>000000000000</td>\n",
       "      <td>00:c1:b1:14:eb:31</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2588704</th>\n",
       "      <td>16952479</td>\n",
       "      <td>548448</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>50128</td>\n",
       "      <td>tcp</td>\n",
       "      <td>b8ac6f3607ee00c1b114eb31080045000b90133f400077...</td>\n",
       "      <td>d7be298a6488d285a1b4cc1bdc0cb1215d68957349ca13...</td>\n",
       "      <td>b8:ac:6f:36:07:ee</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2588705</th>\n",
       "      <td>16952480</td>\n",
       "      <td>545288</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>50023</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80</td>\n",
       "      <td>tcp</td>\n",
       "      <td>00c1b114eb31001e4fd4ca2808004500002837f2400080...</td>\n",
       "      <td>000000000000</td>\n",
       "      <td>00:c1:b1:14:eb:31</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2588706</th>\n",
       "      <td>16952481</td>\n",
       "      <td>545288</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>50023</td>\n",
       "      <td>13.107.4.50</td>\n",
       "      <td>80</td>\n",
       "      <td>tcp</td>\n",
       "      <td>00c1b114eb31001e4fd4ca280800450000283823400080...</td>\n",
       "      <td>000000000000</td>\n",
       "      <td>00:c1:b1:14:eb:31</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2588707 rows × 211 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         packet_id  flow_id      source_ip  source_port destination_ip  \\\n",
       "0         11952482   548448    13.107.4.50           80  192.168.10.14   \n",
       "1         11952483   548448    13.107.4.50           80  192.168.10.14   \n",
       "2         11952485   548448    13.107.4.50           80  192.168.10.14   \n",
       "3         11952487   548448    13.107.4.50           80  192.168.10.14   \n",
       "4         11952489   548448    13.107.4.50           80  192.168.10.14   \n",
       "...            ...      ...            ...          ...            ...   \n",
       "2588702   16952477   548448    13.107.4.50           80  192.168.10.14   \n",
       "2588703   16952478   542647  192.168.10.14        50128    13.107.4.50   \n",
       "2588704   16952479   548448    13.107.4.50           80  192.168.10.14   \n",
       "2588705   16952480   545288  192.168.10.15        50023    13.107.4.50   \n",
       "2588706   16952481   545288  192.168.10.15        50023    13.107.4.50   \n",
       "\n",
       "         destination_port protocol  \\\n",
       "0                   50128      tcp   \n",
       "1                   50128      tcp   \n",
       "2                   50128      tcp   \n",
       "3                   50128      tcp   \n",
       "4                   50128      tcp   \n",
       "...                   ...      ...   \n",
       "2588702             50128      tcp   \n",
       "2588703                80      tcp   \n",
       "2588704             50128      tcp   \n",
       "2588705                80      tcp   \n",
       "2588706                80      tcp   \n",
       "\n",
       "                                                packet_hex  \\\n",
       "0        b8ac6f3607ee00c1b114eb31080045000b905b26400077...   \n",
       "1        b8ac6f3607ee00c1b114eb31080045000b905b28400077...   \n",
       "2        b8ac6f3607ee00c1b114eb310800450005dc5b2a400077...   \n",
       "3        b8ac6f3607ee00c1b114eb31080045000b905b2b400077...   \n",
       "4        b8ac6f3607ee00c1b114eb31080045000b905b2d400077...   \n",
       "...                                                    ...   \n",
       "2588702  b8ac6f3607ee00c1b114eb31080045000b901345400077...   \n",
       "2588703  00c1b114eb31b8ac6f3607ee08004500002852b9400080...   \n",
       "2588704  b8ac6f3607ee00c1b114eb31080045000b90133f400077...   \n",
       "2588705  00c1b114eb31001e4fd4ca2808004500002837f2400080...   \n",
       "2588706  00c1b114eb31001e4fd4ca280800450000283823400080...   \n",
       "\n",
       "                                               payload_hex       Ethernet dst  \\\n",
       "0        b0324fc9a453eadd23fefd8cc2b96da89210015fd420ab...  b8:ac:6f:36:07:ee   \n",
       "1        6f2d76414e7acb086a62445556218cd2f21ad54483cd95...  b8:ac:6f:36:07:ee   \n",
       "2        9b42249338c0f7bd5019393a265532dbae266d32a8418c...  b8:ac:6f:36:07:ee   \n",
       "3        582baa0225e7949925e6747162dbc2f350c554ab64ee36...  b8:ac:6f:36:07:ee   \n",
       "4        e11d7365760201b310a2eb99a4430d6bd1f8ebdec35d76...  b8:ac:6f:36:07:ee   \n",
       "...                                                    ...                ...   \n",
       "2588702  ec414b08efb94aa70cf0a5ed9e9654e1d2686aeade1470...  b8:ac:6f:36:07:ee   \n",
       "2588703                                       000000000000  00:c1:b1:14:eb:31   \n",
       "2588704  d7be298a6488d285a1b4cc1bdc0cb1215d68957349ca13...  b8:ac:6f:36:07:ee   \n",
       "2588705                                       000000000000  00:c1:b1:14:eb:31   \n",
       "2588706                                       000000000000  00:c1:b1:14:eb:31   \n",
       "\n",
       "         ... PPTP data PPTP Incoming Call Connected len  \\\n",
       "0        ...       NaN                              NaN   \n",
       "1        ...       NaN                              NaN   \n",
       "2        ...       NaN                              NaN   \n",
       "3        ...       NaN                              NaN   \n",
       "4        ...       NaN                              NaN   \n",
       "...      ...       ...                              ...   \n",
       "2588702  ...       NaN                              NaN   \n",
       "2588703  ...       NaN                              NaN   \n",
       "2588704  ...       NaN                              NaN   \n",
       "2588705  ...       NaN                              NaN   \n",
       "2588706  ...       NaN                              NaN   \n",
       "\n",
       "         PPTP Incoming Call Connected type  PPTP Outgoing Call Reply len  \\\n",
       "0                                      NaN                           NaN   \n",
       "1                                      NaN                           NaN   \n",
       "2                                      NaN                           NaN   \n",
       "3                                      NaN                           NaN   \n",
       "4                                      NaN                           NaN   \n",
       "...                                    ...                           ...   \n",
       "2588702                                NaN                           NaN   \n",
       "2588703                                NaN                           NaN   \n",
       "2588704                                NaN                           NaN   \n",
       "2588705                                NaN                           NaN   \n",
       "2588706                                NaN                           NaN   \n",
       "\n",
       "        PPTP Outgoing Call Reply type  PPTP Outgoing Call Reply call_id  \\\n",
       "0                                 NaN                               NaN   \n",
       "1                                 NaN                               NaN   \n",
       "2                                 NaN                               NaN   \n",
       "3                                 NaN                               NaN   \n",
       "4                                 NaN                               NaN   \n",
       "...                               ...                               ...   \n",
       "2588702                           NaN                               NaN   \n",
       "2588703                           NaN                               NaN   \n",
       "2588704                           NaN                               NaN   \n",
       "2588705                           NaN                               NaN   \n",
       "2588706                           NaN                               NaN   \n",
       "\n",
       "         Skinny len Skinny res  Skinny msg  attack_label  \n",
       "0               NaN        NaN         NaN        BENIGN  \n",
       "1               NaN        NaN         NaN        BENIGN  \n",
       "2               NaN        NaN         NaN        BENIGN  \n",
       "3               NaN        NaN         NaN        BENIGN  \n",
       "4               NaN        NaN         NaN        BENIGN  \n",
       "...             ...        ...         ...           ...  \n",
       "2588702         NaN        NaN         NaN        BENIGN  \n",
       "2588703         NaN        NaN         NaN        BENIGN  \n",
       "2588704         NaN        NaN         NaN        BENIGN  \n",
       "2588705         NaN        NaN         NaN        BENIGN  \n",
       "2588706         NaN        NaN         NaN        BENIGN  \n",
       "\n",
       "[2588707 rows x 211 columns]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "ca722f1c-9617-4fc2-b356-c02d2cd556eb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "BENIGN                        100000\n",
       "DDoS                          100000\n",
       "DoS Hulk                      100000\n",
       "PortScan                      100000\n",
       "DoS GoldenEye                  10293\n",
       "FTP-Patator                     7935\n",
       "SSH-Patator                     5897\n",
       "DoS slowloris                   5796\n",
       "DoS Slowhttptest                5499\n",
       "Bot                             1956\n",
       "Web Attack – Brute Force        1507\n",
       "Web Attack – XSS                 652\n",
       "Infiltration                      36\n",
       "Web Attack – Sql Injection        21\n",
       "Heartbleed                        11\n",
       "Name: attack_label, dtype: int64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "flow1.attack_label.value_counts(dropna=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "dcc1a972-ece3-44b4-9d1d-657fc441c196",
   "metadata": {},
   "outputs": [],
   "source": [
    "flow1 = flow1.groupby('attack_label').apply(lambda x: x.sample(min(len(x), 100000))).reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "ce9e7dd2-0fa0-404a-9e6e-b55402249e99",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>flow_id</th>\n",
       "      <th>source_ip</th>\n",
       "      <th>source_port</th>\n",
       "      <th>destination_ip</th>\n",
       "      <th>destination_port</th>\n",
       "      <th>protocol</th>\n",
       "      <th>Timestamp</th>\n",
       "      <th>Flow Duration</th>\n",
       "      <th>Total Fwd Packets</th>\n",
       "      <th>Total Backward Packets</th>\n",
       "      <th>...</th>\n",
       "      <th>min_seg_size_forward</th>\n",
       "      <th>Active Mean</th>\n",
       "      <th>Active Std</th>\n",
       "      <th>Active Max</th>\n",
       "      <th>Active Min</th>\n",
       "      <th>Idle Mean</th>\n",
       "      <th>Idle Std</th>\n",
       "      <th>Idle Max</th>\n",
       "      <th>Idle Min</th>\n",
       "      <th>attack_label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1421252</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>60311.0</td>\n",
       "      <td>192.168.10.1</td>\n",
       "      <td>53.0</td>\n",
       "      <td>udp</td>\n",
       "      <td>5/7/2017 2:44</td>\n",
       "      <td>48190.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>32.0</td>\n",
       "      <td>0.0000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1843601</td>\n",
       "      <td>192.168.10.8</td>\n",
       "      <td>35284.0</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>5555.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>6/7/2017 3:28</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>24.0</td>\n",
       "      <td>0.0000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1520000</td>\n",
       "      <td>172.217.12.174</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.9</td>\n",
       "      <td>22976.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>5/7/2017 3:56</td>\n",
       "      <td>51.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2252114</td>\n",
       "      <td>192.168.10.8</td>\n",
       "      <td>3131.0</td>\n",
       "      <td>52.39.237.157</td>\n",
       "      <td>443.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>7/7/2017 11:06</td>\n",
       "      <td>63094225.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>568211.8333</td>\n",
       "      <td>1182762.664</td>\n",
       "      <td>2982516.0</td>\n",
       "      <td>85189.0</td>\n",
       "      <td>9947454.167</td>\n",
       "      <td>82386.26616</td>\n",
       "      <td>9995414.0</td>\n",
       "      <td>9794892.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1928644</td>\n",
       "      <td>192.168.10.8</td>\n",
       "      <td>1915.0</td>\n",
       "      <td>35.156.199.43</td>\n",
       "      <td>80.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>6/7/2017 4:35</td>\n",
       "      <td>103.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>439598</th>\n",
       "      <td>2030906</td>\n",
       "      <td>172.16.0.1</td>\n",
       "      <td>55942.0</td>\n",
       "      <td>192.168.10.50</td>\n",
       "      <td>80.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>6/7/2017 10:22</td>\n",
       "      <td>5993237.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>32.0</td>\n",
       "      <td>0.0000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Web Attack – XSS</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>439599</th>\n",
       "      <td>2035394</td>\n",
       "      <td>172.16.0.1</td>\n",
       "      <td>59772.0</td>\n",
       "      <td>192.168.10.50</td>\n",
       "      <td>80.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>6/7/2017 10:27</td>\n",
       "      <td>5334912.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>32.0</td>\n",
       "      <td>0.0000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Web Attack – XSS</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>439600</th>\n",
       "      <td>2030496</td>\n",
       "      <td>172.16.0.1</td>\n",
       "      <td>55444.0</td>\n",
       "      <td>192.168.10.50</td>\n",
       "      <td>80.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>6/7/2017 10:21</td>\n",
       "      <td>5952394.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>32.0</td>\n",
       "      <td>0.0000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Web Attack – XSS</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>439601</th>\n",
       "      <td>2029813</td>\n",
       "      <td>172.16.0.1</td>\n",
       "      <td>54268.0</td>\n",
       "      <td>192.168.10.50</td>\n",
       "      <td>80.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>6/7/2017 10:20</td>\n",
       "      <td>345.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>32.0</td>\n",
       "      <td>0.0000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Web Attack – XSS</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>439602</th>\n",
       "      <td>2029711</td>\n",
       "      <td>172.16.0.1</td>\n",
       "      <td>54808.0</td>\n",
       "      <td>192.168.10.50</td>\n",
       "      <td>80.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>6/7/2017 10:20</td>\n",
       "      <td>5586384.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>32.0</td>\n",
       "      <td>0.0000</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Web Attack – XSS</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>439603 rows × 84 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        flow_id       source_ip  source_port destination_ip  destination_port  \\\n",
       "0       1421252    192.168.10.3      60311.0   192.168.10.1              53.0   \n",
       "1       1843601    192.168.10.8      35284.0  192.168.10.14            5555.0   \n",
       "2       1520000  172.217.12.174         80.0   192.168.10.9           22976.0   \n",
       "3       2252114    192.168.10.8       3131.0  52.39.237.157             443.0   \n",
       "4       1928644    192.168.10.8       1915.0  35.156.199.43              80.0   \n",
       "...         ...             ...          ...            ...               ...   \n",
       "439598  2030906      172.16.0.1      55942.0  192.168.10.50              80.0   \n",
       "439599  2035394      172.16.0.1      59772.0  192.168.10.50              80.0   \n",
       "439600  2030496      172.16.0.1      55444.0  192.168.10.50              80.0   \n",
       "439601  2029813      172.16.0.1      54268.0  192.168.10.50              80.0   \n",
       "439602  2029711      172.16.0.1      54808.0  192.168.10.50              80.0   \n",
       "\n",
       "       protocol       Timestamp  Flow Duration  Total Fwd Packets  \\\n",
       "0           udp   5/7/2017 2:44        48190.0                1.0   \n",
       "1           tcp   6/7/2017 3:28            3.0                2.0   \n",
       "2           tcp   5/7/2017 3:56           51.0                1.0   \n",
       "3           tcp  7/7/2017 11:06     63094225.0               17.0   \n",
       "4           tcp   6/7/2017 4:35          103.0                2.0   \n",
       "...         ...             ...            ...                ...   \n",
       "439598      tcp  6/7/2017 10:22      5993237.0                3.0   \n",
       "439599      tcp  6/7/2017 10:27      5334912.0                3.0   \n",
       "439600      tcp  6/7/2017 10:21      5952394.0                3.0   \n",
       "439601      tcp  6/7/2017 10:20          345.0                2.0   \n",
       "439602      tcp  6/7/2017 10:20      5586384.0                3.0   \n",
       "\n",
       "        Total Backward Packets  ...  min_seg_size_forward  Active Mean  \\\n",
       "0                          1.0  ...                  32.0       0.0000   \n",
       "1                          0.0  ...                  24.0       0.0000   \n",
       "2                          1.0  ...                  20.0       0.0000   \n",
       "3                         17.0  ...                  20.0  568211.8333   \n",
       "4                          0.0  ...                  20.0       0.0000   \n",
       "...                        ...  ...                   ...          ...   \n",
       "439598                     1.0  ...                  32.0       0.0000   \n",
       "439599                     1.0  ...                  32.0       0.0000   \n",
       "439600                     1.0  ...                  32.0       0.0000   \n",
       "439601                     0.0  ...                  32.0       0.0000   \n",
       "439602                     1.0  ...                  32.0       0.0000   \n",
       "\n",
       "         Active Std  Active Max  Active Min    Idle Mean     Idle Std  \\\n",
       "0             0.000         0.0         0.0        0.000      0.00000   \n",
       "1             0.000         0.0         0.0        0.000      0.00000   \n",
       "2             0.000         0.0         0.0        0.000      0.00000   \n",
       "3       1182762.664   2982516.0     85189.0  9947454.167  82386.26616   \n",
       "4             0.000         0.0         0.0        0.000      0.00000   \n",
       "...             ...         ...         ...          ...          ...   \n",
       "439598        0.000         0.0         0.0        0.000      0.00000   \n",
       "439599        0.000         0.0         0.0        0.000      0.00000   \n",
       "439600        0.000         0.0         0.0        0.000      0.00000   \n",
       "439601        0.000         0.0         0.0        0.000      0.00000   \n",
       "439602        0.000         0.0         0.0        0.000      0.00000   \n",
       "\n",
       "         Idle Max   Idle Min      attack_label  \n",
       "0             0.0        0.0            BENIGN  \n",
       "1             0.0        0.0            BENIGN  \n",
       "2             0.0        0.0            BENIGN  \n",
       "3       9995414.0  9794892.0            BENIGN  \n",
       "4             0.0        0.0            BENIGN  \n",
       "...           ...        ...               ...  \n",
       "439598        0.0        0.0  Web Attack – XSS  \n",
       "439599        0.0        0.0  Web Attack – XSS  \n",
       "439600        0.0        0.0  Web Attack – XSS  \n",
       "439601        0.0        0.0  Web Attack – XSS  \n",
       "439602        0.0        0.0  Web Attack – XSS  \n",
       "\n",
       "[439603 rows x 84 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "flow1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "52d50ae0-e555-4882-8021-ee01f38157fd",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa35bfc2b80>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 400, in match_module_callback\n",
      "    self._make_module_from_path(filepath)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 515, in _make_module_from_path\n",
      "    module = module_class(filepath, prefix, user_api, internal_api)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 606, in __init__\n",
      "    self.version = self.get_version()\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 646, in get_version\n",
      "    config = get_config().split()\n",
      "AttributeError: 'NoneType' object has no attribute 'split'\n",
      "[LightGBM] [Fatal] GPU Tree Learner was not enabled in this build.\n",
      "Please recompile with CMake option -DUSE_GPU=1\n",
      "[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.\n",
      "Please recompile with CMake option -DUSE_CUDA=1\n",
      "[LightGBM] [Fatal] GPU Tree Learner was not enabled in this build.\n",
      "Please recompile with CMake option -DUSE_GPU=1\n",
      "[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.\n",
      "Please recompile with CMake option -DUSE_CUDA=1\n",
      "[LightGBM] [Fatal] GPU Tree Learner was not enabled in this build.\n",
      "Please recompile with CMake option -DUSE_GPU=1\n",
      "[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.\n",
      "Please recompile with CMake option -DUSE_CUDA=1\n",
      "[LightGBM] [Fatal] GPU Tree Learner was not enabled in this build.\n",
      "Please recompile with CMake option -DUSE_GPU=1\n",
      "[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.\n",
      "Please recompile with CMake option -DUSE_CUDA=1\n",
      "[LightGBM] [Fatal] GPU Tree Learner was not enabled in this build.\n",
      "Please recompile with CMake option -DUSE_GPU=1\n",
      "[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.\n",
      "Please recompile with CMake option -DUSE_CUDA=1\n",
      "[LightGBM] [Fatal] GPU Tree Learner was not enabled in this build.\n",
      "Please recompile with CMake option -DUSE_GPU=1\n",
      "[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.\n",
      "Please recompile with CMake option -DUSE_CUDA=1\n",
      "[LightGBM] [Fatal] GPU Tree Learner was not enabled in this build.\n",
      "Please recompile with CMake option -DUSE_GPU=1\n",
      "[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.\n",
      "Please recompile with CMake option -DUSE_CUDA=1\n",
      "Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa40a8dadc0>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 400, in match_module_callback\n",
      "    self._make_module_from_path(filepath)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 515, in _make_module_from_path\n",
      "    module = module_class(filepath, prefix, user_api, internal_api)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 606, in __init__\n",
      "    self.version = self.get_version()\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 646, in get_version\n",
      "    config = get_config().split()\n",
      "AttributeError: 'NoneType' object has no attribute 'split'\n",
      "Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa40a8daf70>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 400, in match_module_callback\n",
      "    self._make_module_from_path(filepath)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 515, in _make_module_from_path\n",
      "    module = module_class(filepath, prefix, user_api, internal_api)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 606, in __init__\n",
      "    self.version = self.get_version()\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 646, in get_version\n",
      "    config = get_config().split()\n",
      "AttributeError: 'NoneType' object has no attribute 'split'\n",
      "Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa40a8dadc0>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 400, in match_module_callback\n",
      "    self._make_module_from_path(filepath)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 515, in _make_module_from_path\n",
      "    module = module_class(filepath, prefix, user_api, internal_api)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 606, in __init__\n",
      "    self.version = self.get_version()\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 646, in get_version\n",
      "    config = get_config().split()\n",
      "AttributeError: 'NoneType' object has no attribute 'split'\n",
      "Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa40a8daf70>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 400, in match_module_callback\n",
      "    self._make_module_from_path(filepath)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 515, in _make_module_from_path\n",
      "    module = module_class(filepath, prefix, user_api, internal_api)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 606, in __init__\n",
      "    self.version = self.get_version()\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 646, in get_version\n",
      "    config = get_config().split()\n",
      "AttributeError: 'NoneType' object has no attribute 'split'\n",
      "Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa40a8daf70>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 400, in match_module_callback\n",
      "    self._make_module_from_path(filepath)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 515, in _make_module_from_path\n",
      "    module = module_class(filepath, prefix, user_api, internal_api)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 606, in __init__\n",
      "    self.version = self.get_version()\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 646, in get_version\n",
      "    config = get_config().split()\n",
      "AttributeError: 'NoneType' object has no attribute 'split'\n",
      "Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa36a7979d0>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 400, in match_module_callback\n",
      "    self._make_module_from_path(filepath)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 515, in _make_module_from_path\n",
      "    module = module_class(filepath, prefix, user_api, internal_api)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 606, in __init__\n",
      "    self.version = self.get_version()\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 646, in get_version\n",
      "    config = get_config().split()\n",
      "AttributeError: 'NoneType' object has no attribute 'split'\n",
      "Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa40a8daf70>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 400, in match_module_callback\n",
      "    self._make_module_from_path(filepath)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 515, in _make_module_from_path\n",
      "    module = module_class(filepath, prefix, user_api, internal_api)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 606, in __init__\n",
      "    self.version = self.get_version()\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 646, in get_version\n",
      "    config = get_config().split()\n",
      "AttributeError: 'NoneType' object has no attribute 'split'\n",
      "Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa36a7979d0>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 400, in match_module_callback\n",
      "    self._make_module_from_path(filepath)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 515, in _make_module_from_path\n",
      "    module = module_class(filepath, prefix, user_api, internal_api)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 606, in __init__\n",
      "    self.version = self.get_version()\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 646, in get_version\n",
      "    config = get_config().split()\n",
      "AttributeError: 'NoneType' object has no attribute 'split'\n",
      "Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa40a8daf70>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 400, in match_module_callback\n",
      "    self._make_module_from_path(filepath)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 515, in _make_module_from_path\n",
      "    module = module_class(filepath, prefix, user_api, internal_api)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 606, in __init__\n",
      "    self.version = self.get_version()\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 646, in get_version\n",
      "    config = get_config().split()\n",
      "AttributeError: 'NoneType' object has no attribute 'split'\n",
      "Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa40a8daf70>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 400, in match_module_callback\n",
      "    self._make_module_from_path(filepath)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 515, in _make_module_from_path\n",
      "    module = module_class(filepath, prefix, user_api, internal_api)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 606, in __init__\n",
      "    self.version = self.get_version()\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 646, in get_version\n",
      "    config = get_config().split()\n",
      "AttributeError: 'NoneType' object has no attribute 'split'\n",
      "Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa36a7979d0>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 400, in match_module_callback\n",
      "    self._make_module_from_path(filepath)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 515, in _make_module_from_path\n",
      "    module = module_class(filepath, prefix, user_api, internal_api)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 606, in __init__\n",
      "    self.version = self.get_version()\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 646, in get_version\n",
      "    config = get_config().split()\n",
      "AttributeError: 'NoneType' object has no attribute 'split'\n",
      "Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa40a8da1f0>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 400, in match_module_callback\n",
      "    self._make_module_from_path(filepath)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 515, in _make_module_from_path\n",
      "    module = module_class(filepath, prefix, user_api, internal_api)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 606, in __init__\n",
      "    self.version = self.get_version()\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 646, in get_version\n",
      "    config = get_config().split()\n",
      "AttributeError: 'NoneType' object has no attribute 'split'\n",
      "Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa40a8da1f0>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 400, in match_module_callback\n",
      "    self._make_module_from_path(filepath)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 515, in _make_module_from_path\n",
      "    module = module_class(filepath, prefix, user_api, internal_api)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 606, in __init__\n",
      "    self.version = self.get_version()\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 646, in get_version\n",
      "    config = get_config().split()\n",
      "AttributeError: 'NoneType' object has no attribute 'split'\n",
      "Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa40a8da280>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 400, in match_module_callback\n",
      "    self._make_module_from_path(filepath)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 515, in _make_module_from_path\n",
      "    module = module_class(filepath, prefix, user_api, internal_api)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 606, in __init__\n",
      "    self.version = self.get_version()\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 646, in get_version\n",
      "    config = get_config().split()\n",
      "AttributeError: 'NoneType' object has no attribute 'split'\n",
      "Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa40a9bd310>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 400, in match_module_callback\n",
      "    self._make_module_from_path(filepath)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 515, in _make_module_from_path\n",
      "    module = module_class(filepath, prefix, user_api, internal_api)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 606, in __init__\n",
      "    self.version = self.get_version()\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 646, in get_version\n",
      "    config = get_config().split()\n",
      "AttributeError: 'NoneType' object has no attribute 'split'\n",
      "Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa40a8dadc0>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 400, in match_module_callback\n",
      "    self._make_module_from_path(filepath)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 515, in _make_module_from_path\n",
      "    module = module_class(filepath, prefix, user_api, internal_api)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 606, in __init__\n",
      "    self.version = self.get_version()\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 646, in get_version\n",
      "    config = get_config().split()\n",
      "AttributeError: 'NoneType' object has no attribute 'split'\n",
      "Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa366d44b80>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 400, in match_module_callback\n",
      "    self._make_module_from_path(filepath)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 515, in _make_module_from_path\n",
      "    module = module_class(filepath, prefix, user_api, internal_api)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 606, in __init__\n",
      "    self.version = self.get_version()\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 646, in get_version\n",
      "    config = get_config().split()\n",
      "AttributeError: 'NoneType' object has no attribute 'split'\n",
      "Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa40cdfb430>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 400, in match_module_callback\n",
      "    self._make_module_from_path(filepath)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 515, in _make_module_from_path\n",
      "    module = module_class(filepath, prefix, user_api, internal_api)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 606, in __init__\n",
      "    self.version = self.get_version()\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 646, in get_version\n",
      "    config = get_config().split()\n",
      "AttributeError: 'NoneType' object has no attribute 'split'\n",
      "Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa40cdfb4c0>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 400, in match_module_callback\n",
      "    self._make_module_from_path(filepath)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 515, in _make_module_from_path\n",
      "    module = module_class(filepath, prefix, user_api, internal_api)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 606, in __init__\n",
      "    self.version = self.get_version()\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 646, in get_version\n",
      "    config = get_config().split()\n",
      "AttributeError: 'NoneType' object has no attribute 'split'\n",
      "Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa40cdfb430>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 400, in match_module_callback\n",
      "    self._make_module_from_path(filepath)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 515, in _make_module_from_path\n",
      "    module = module_class(filepath, prefix, user_api, internal_api)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 606, in __init__\n",
      "    self.version = self.get_version()\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 646, in get_version\n",
      "    config = get_config().split()\n",
      "AttributeError: 'NoneType' object has no attribute 'split'\n",
      "Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa40ab25670>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 400, in match_module_callback\n",
      "    self._make_module_from_path(filepath)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 515, in _make_module_from_path\n",
      "    module = module_class(filepath, prefix, user_api, internal_api)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 606, in __init__\n",
      "    self.version = self.get_version()\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 646, in get_version\n",
      "    config = get_config().split()\n",
      "AttributeError: 'NoneType' object has no attribute 'split'\n",
      "Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fa40a9bd310>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 400, in match_module_callback\n",
      "    self._make_module_from_path(filepath)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 515, in _make_module_from_path\n",
      "    module = module_class(filepath, prefix, user_api, internal_api)\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 606, in __init__\n",
      "    self.version = self.get_version()\n",
      "  File \"/opt/jupyter-hub/lib/python3.9/site-packages/threadpoolctl.py\", line 646, in get_version\n",
      "    config = get_config().split()\n",
      "AttributeError: 'NoneType' object has no attribute 'split'\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<style type=\"text/css\">\n",
       "#T_03a5a_row10_col1, #T_03a5a_row16_col1, #T_03a5a_row18_col1, #T_03a5a_row20_col1, #T_03a5a_row27_col1 {\n",
       "  background-color: lightgreen;\n",
       "}\n",
       "</style>\n",
       "<table id=\"T_03a5a\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th class=\"blank level0\" >&nbsp;</th>\n",
       "      <th id=\"T_03a5a_level0_col0\" class=\"col_heading level0 col0\" >Description</th>\n",
       "      <th id=\"T_03a5a_level0_col1\" class=\"col_heading level0 col1\" >Value</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
       "      <td id=\"T_03a5a_row0_col0\" class=\"data row0 col0\" >Session id</td>\n",
       "      <td id=\"T_03a5a_row0_col1\" class=\"data row0 col1\" >123</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row1\" class=\"row_heading level0 row1\" >1</th>\n",
       "      <td id=\"T_03a5a_row1_col0\" class=\"data row1 col0\" >Target</td>\n",
       "      <td id=\"T_03a5a_row1_col1\" class=\"data row1 col1\" >attack_label</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row2\" class=\"row_heading level0 row2\" >2</th>\n",
       "      <td id=\"T_03a5a_row2_col0\" class=\"data row2 col0\" >Target type</td>\n",
       "      <td id=\"T_03a5a_row2_col1\" class=\"data row2 col1\" >Multiclass</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row3\" class=\"row_heading level0 row3\" >3</th>\n",
       "      <td id=\"T_03a5a_row3_col0\" class=\"data row3 col0\" >Target mapping</td>\n",
       "      <td id=\"T_03a5a_row3_col1\" class=\"data row3 col1\" >BENIGN: 0, Bot: 1, DDoS: 2, DoS GoldenEye: 3, DoS Hulk: 4, DoS Slowhttptest: 5, DoS slowloris: 6, FTP-Patator: 7, Heartbleed: 8, Infiltration: 9, PortScan: 10, SSH-Patator: 11, Web Attack – Brute Force: 12, Web Attack – Sql Injection: 13, Web Attack – XSS: 14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row4\" class=\"row_heading level0 row4\" >4</th>\n",
       "      <td id=\"T_03a5a_row4_col0\" class=\"data row4 col0\" >Original data shape</td>\n",
       "      <td id=\"T_03a5a_row4_col1\" class=\"data row4 col1\" >(439603, 84)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row5\" class=\"row_heading level0 row5\" >5</th>\n",
       "      <td id=\"T_03a5a_row5_col0\" class=\"data row5 col0\" >Transformed data shape</td>\n",
       "      <td id=\"T_03a5a_row5_col1\" class=\"data row5 col1\" >(1181881, 17)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row6\" class=\"row_heading level0 row6\" >6</th>\n",
       "      <td id=\"T_03a5a_row6_col0\" class=\"data row6 col0\" >Transformed train set shape</td>\n",
       "      <td id=\"T_03a5a_row6_col1\" class=\"data row6 col1\" >(1050000, 17)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row7\" class=\"row_heading level0 row7\" >7</th>\n",
       "      <td id=\"T_03a5a_row7_col0\" class=\"data row7 col0\" >Transformed test set shape</td>\n",
       "      <td id=\"T_03a5a_row7_col1\" class=\"data row7 col1\" >(131881, 17)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row8\" class=\"row_heading level0 row8\" >8</th>\n",
       "      <td id=\"T_03a5a_row8_col0\" class=\"data row8 col0\" >Numeric features</td>\n",
       "      <td id=\"T_03a5a_row8_col1\" class=\"data row8 col1\" >79</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row9\" class=\"row_heading level0 row9\" >9</th>\n",
       "      <td id=\"T_03a5a_row9_col0\" class=\"data row9 col0\" >Categorical features</td>\n",
       "      <td id=\"T_03a5a_row9_col1\" class=\"data row9 col1\" >4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row10\" class=\"row_heading level0 row10\" >10</th>\n",
       "      <td id=\"T_03a5a_row10_col0\" class=\"data row10 col0\" >Preprocess</td>\n",
       "      <td id=\"T_03a5a_row10_col1\" class=\"data row10 col1\" >True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row11\" class=\"row_heading level0 row11\" >11</th>\n",
       "      <td id=\"T_03a5a_row11_col0\" class=\"data row11 col0\" >Imputation type</td>\n",
       "      <td id=\"T_03a5a_row11_col1\" class=\"data row11 col1\" >simple</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row12\" class=\"row_heading level0 row12\" >12</th>\n",
       "      <td id=\"T_03a5a_row12_col0\" class=\"data row12 col0\" >Numeric imputation</td>\n",
       "      <td id=\"T_03a5a_row12_col1\" class=\"data row12 col1\" >mean</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row13\" class=\"row_heading level0 row13\" >13</th>\n",
       "      <td id=\"T_03a5a_row13_col0\" class=\"data row13 col0\" >Categorical imputation</td>\n",
       "      <td id=\"T_03a5a_row13_col1\" class=\"data row13 col1\" >mode</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row14\" class=\"row_heading level0 row14\" >14</th>\n",
       "      <td id=\"T_03a5a_row14_col0\" class=\"data row14 col0\" >Maximum one-hot encoding</td>\n",
       "      <td id=\"T_03a5a_row14_col1\" class=\"data row14 col1\" >25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row15\" class=\"row_heading level0 row15\" >15</th>\n",
       "      <td id=\"T_03a5a_row15_col0\" class=\"data row15 col0\" >Encoding method</td>\n",
       "      <td id=\"T_03a5a_row15_col1\" class=\"data row15 col1\" >None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row16\" class=\"row_heading level0 row16\" >16</th>\n",
       "      <td id=\"T_03a5a_row16_col0\" class=\"data row16 col0\" >Fix imbalance</td>\n",
       "      <td id=\"T_03a5a_row16_col1\" class=\"data row16 col1\" >True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row17\" class=\"row_heading level0 row17\" >17</th>\n",
       "      <td id=\"T_03a5a_row17_col0\" class=\"data row17 col0\" >Fix imbalance method</td>\n",
       "      <td id=\"T_03a5a_row17_col1\" class=\"data row17 col1\" >SMOTE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row18\" class=\"row_heading level0 row18\" >18</th>\n",
       "      <td id=\"T_03a5a_row18_col0\" class=\"data row18 col0\" >Normalize</td>\n",
       "      <td id=\"T_03a5a_row18_col1\" class=\"data row18 col1\" >True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row19\" class=\"row_heading level0 row19\" >19</th>\n",
       "      <td id=\"T_03a5a_row19_col0\" class=\"data row19 col0\" >Normalize method</td>\n",
       "      <td id=\"T_03a5a_row19_col1\" class=\"data row19 col1\" >zscore</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row20\" class=\"row_heading level0 row20\" >20</th>\n",
       "      <td id=\"T_03a5a_row20_col0\" class=\"data row20 col0\" >Feature selection</td>\n",
       "      <td id=\"T_03a5a_row20_col1\" class=\"data row20 col1\" >True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row21\" class=\"row_heading level0 row21\" >21</th>\n",
       "      <td id=\"T_03a5a_row21_col0\" class=\"data row21 col0\" >Feature selection method</td>\n",
       "      <td id=\"T_03a5a_row21_col1\" class=\"data row21 col1\" >classic</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row22\" class=\"row_heading level0 row22\" >22</th>\n",
       "      <td id=\"T_03a5a_row22_col0\" class=\"data row22 col0\" >Feature selection estimator</td>\n",
       "      <td id=\"T_03a5a_row22_col1\" class=\"data row22 col1\" >lightgbm</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row23\" class=\"row_heading level0 row23\" >23</th>\n",
       "      <td id=\"T_03a5a_row23_col0\" class=\"data row23 col0\" >Number of features selected</td>\n",
       "      <td id=\"T_03a5a_row23_col1\" class=\"data row23 col1\" >0.200000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row24\" class=\"row_heading level0 row24\" >24</th>\n",
       "      <td id=\"T_03a5a_row24_col0\" class=\"data row24 col0\" >Fold Generator</td>\n",
       "      <td id=\"T_03a5a_row24_col1\" class=\"data row24 col1\" >StratifiedKFold</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row25\" class=\"row_heading level0 row25\" >25</th>\n",
       "      <td id=\"T_03a5a_row25_col0\" class=\"data row25 col0\" >Fold Number</td>\n",
       "      <td id=\"T_03a5a_row25_col1\" class=\"data row25 col1\" >10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row26\" class=\"row_heading level0 row26\" >26</th>\n",
       "      <td id=\"T_03a5a_row26_col0\" class=\"data row26 col0\" >CPU Jobs</td>\n",
       "      <td id=\"T_03a5a_row26_col1\" class=\"data row26 col1\" >-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row27\" class=\"row_heading level0 row27\" >27</th>\n",
       "      <td id=\"T_03a5a_row27_col0\" class=\"data row27 col0\" >Use GPU</td>\n",
       "      <td id=\"T_03a5a_row27_col1\" class=\"data row27 col1\" >True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row28\" class=\"row_heading level0 row28\" >28</th>\n",
       "      <td id=\"T_03a5a_row28_col0\" class=\"data row28 col0\" >Log Experiment</td>\n",
       "      <td id=\"T_03a5a_row28_col1\" class=\"data row28 col1\" >False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row29\" class=\"row_heading level0 row29\" >29</th>\n",
       "      <td id=\"T_03a5a_row29_col0\" class=\"data row29 col0\" >Experiment Name</td>\n",
       "      <td id=\"T_03a5a_row29_col1\" class=\"data row29 col1\" >clf-default-name</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_03a5a_level0_row30\" class=\"row_heading level0 row30\" >30</th>\n",
       "      <td id=\"T_03a5a_row30_col0\" class=\"data row30 col0\" >USI</td>\n",
       "      <td id=\"T_03a5a_row30_col1\" class=\"data row30 col1\" >fada</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n"
      ],
      "text/plain": [
       "<pandas.io.formats.style.Styler at 0x7fa40a882eb0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[LightGBM] [Fatal] GPU Tree Learner was not enabled in this build.\n",
      "Please recompile with CMake option -DUSE_GPU=1\n",
      "[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.\n",
      "Please recompile with CMake option -DUSE_CUDA=1\n",
      "[LightGBM] [Fatal] GPU Tree Learner was not enabled in this build.\n",
      "Please recompile with CMake option -DUSE_GPU=1\n",
      "[LightGBM] [Fatal] CUDA Tree Learner was not enabled in this build.\n",
      "Please recompile with CMake option -DUSE_CUDA=1\n"
     ]
    }
   ],
   "source": [
    "from pycaret.classification import *\n",
    "s = setup(flow1, target = 'attack_label', normalize = True, fix_imbalance=True, use_gpu=True, feature_selection = True, session_id = 123)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "63e64bf3-4864-41b6-bb88-4b19ad512838",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<style type=\"text/css\">\n",
       "#T_73c8b_row10_col0, #T_73c8b_row10_col1, #T_73c8b_row10_col2, #T_73c8b_row10_col3, #T_73c8b_row10_col4, #T_73c8b_row10_col5, #T_73c8b_row10_col6 {\n",
       "  background: yellow;\n",
       "}\n",
       "</style>\n",
       "<table id=\"T_73c8b\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th class=\"blank level0\" >&nbsp;</th>\n",
       "      <th id=\"T_73c8b_level0_col0\" class=\"col_heading level0 col0\" >Accuracy</th>\n",
       "      <th id=\"T_73c8b_level0_col1\" class=\"col_heading level0 col1\" >AUC</th>\n",
       "      <th id=\"T_73c8b_level0_col2\" class=\"col_heading level0 col2\" >Recall</th>\n",
       "      <th id=\"T_73c8b_level0_col3\" class=\"col_heading level0 col3\" >Prec.</th>\n",
       "      <th id=\"T_73c8b_level0_col4\" class=\"col_heading level0 col4\" >F1</th>\n",
       "      <th id=\"T_73c8b_level0_col5\" class=\"col_heading level0 col5\" >Kappa</th>\n",
       "      <th id=\"T_73c8b_level0_col6\" class=\"col_heading level0 col6\" >MCC</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th class=\"index_name level0\" >Fold</th>\n",
       "      <th class=\"blank col0\" >&nbsp;</th>\n",
       "      <th class=\"blank col1\" >&nbsp;</th>\n",
       "      <th class=\"blank col2\" >&nbsp;</th>\n",
       "      <th class=\"blank col3\" >&nbsp;</th>\n",
       "      <th class=\"blank col4\" >&nbsp;</th>\n",
       "      <th class=\"blank col5\" >&nbsp;</th>\n",
       "      <th class=\"blank col6\" >&nbsp;</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th id=\"T_73c8b_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
       "      <td id=\"T_73c8b_row0_col0\" class=\"data row0 col0\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row0_col1\" class=\"data row0 col1\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row0_col2\" class=\"data row0 col2\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row0_col3\" class=\"data row0 col3\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row0_col4\" class=\"data row0 col4\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row0_col5\" class=\"data row0 col5\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row0_col6\" class=\"data row0 col6\" >1.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_73c8b_level0_row1\" class=\"row_heading level0 row1\" >1</th>\n",
       "      <td id=\"T_73c8b_row1_col0\" class=\"data row1 col0\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row1_col1\" class=\"data row1 col1\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row1_col2\" class=\"data row1 col2\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row1_col3\" class=\"data row1 col3\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row1_col4\" class=\"data row1 col4\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row1_col5\" class=\"data row1 col5\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row1_col6\" class=\"data row1 col6\" >1.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_73c8b_level0_row2\" class=\"row_heading level0 row2\" >2</th>\n",
       "      <td id=\"T_73c8b_row2_col0\" class=\"data row2 col0\" >0.9999</td>\n",
       "      <td id=\"T_73c8b_row2_col1\" class=\"data row2 col1\" >0.0000</td>\n",
       "      <td id=\"T_73c8b_row2_col2\" class=\"data row2 col2\" >0.9999</td>\n",
       "      <td id=\"T_73c8b_row2_col3\" class=\"data row2 col3\" >0.9999</td>\n",
       "      <td id=\"T_73c8b_row2_col4\" class=\"data row2 col4\" >0.9999</td>\n",
       "      <td id=\"T_73c8b_row2_col5\" class=\"data row2 col5\" >0.9998</td>\n",
       "      <td id=\"T_73c8b_row2_col6\" class=\"data row2 col6\" >0.9998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_73c8b_level0_row3\" class=\"row_heading level0 row3\" >3</th>\n",
       "      <td id=\"T_73c8b_row3_col0\" class=\"data row3 col0\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row3_col1\" class=\"data row3 col1\" >0.0000</td>\n",
       "      <td id=\"T_73c8b_row3_col2\" class=\"data row3 col2\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row3_col3\" class=\"data row3 col3\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row3_col4\" class=\"data row3 col4\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row3_col5\" class=\"data row3 col5\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row3_col6\" class=\"data row3 col6\" >1.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_73c8b_level0_row4\" class=\"row_heading level0 row4\" >4</th>\n",
       "      <td id=\"T_73c8b_row4_col0\" class=\"data row4 col0\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row4_col1\" class=\"data row4 col1\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row4_col2\" class=\"data row4 col2\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row4_col3\" class=\"data row4 col3\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row4_col4\" class=\"data row4 col4\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row4_col5\" class=\"data row4 col5\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row4_col6\" class=\"data row4 col6\" >1.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_73c8b_level0_row5\" class=\"row_heading level0 row5\" >5</th>\n",
       "      <td id=\"T_73c8b_row5_col0\" class=\"data row5 col0\" >0.9999</td>\n",
       "      <td id=\"T_73c8b_row5_col1\" class=\"data row5 col1\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row5_col2\" class=\"data row5 col2\" >0.9999</td>\n",
       "      <td id=\"T_73c8b_row5_col3\" class=\"data row5 col3\" >0.9999</td>\n",
       "      <td id=\"T_73c8b_row5_col4\" class=\"data row5 col4\" >0.9999</td>\n",
       "      <td id=\"T_73c8b_row5_col5\" class=\"data row5 col5\" >0.9999</td>\n",
       "      <td id=\"T_73c8b_row5_col6\" class=\"data row5 col6\" >0.9999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_73c8b_level0_row6\" class=\"row_heading level0 row6\" >6</th>\n",
       "      <td id=\"T_73c8b_row6_col0\" class=\"data row6 col0\" >0.9999</td>\n",
       "      <td id=\"T_73c8b_row6_col1\" class=\"data row6 col1\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row6_col2\" class=\"data row6 col2\" >0.9999</td>\n",
       "      <td id=\"T_73c8b_row6_col3\" class=\"data row6 col3\" >0.9998</td>\n",
       "      <td id=\"T_73c8b_row6_col4\" class=\"data row6 col4\" >0.9999</td>\n",
       "      <td id=\"T_73c8b_row6_col5\" class=\"data row6 col5\" >0.9998</td>\n",
       "      <td id=\"T_73c8b_row6_col6\" class=\"data row6 col6\" >0.9998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_73c8b_level0_row7\" class=\"row_heading level0 row7\" >7</th>\n",
       "      <td id=\"T_73c8b_row7_col0\" class=\"data row7 col0\" >0.9998</td>\n",
       "      <td id=\"T_73c8b_row7_col1\" class=\"data row7 col1\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row7_col2\" class=\"data row7 col2\" >0.9998</td>\n",
       "      <td id=\"T_73c8b_row7_col3\" class=\"data row7 col3\" >0.9998</td>\n",
       "      <td id=\"T_73c8b_row7_col4\" class=\"data row7 col4\" >0.9998</td>\n",
       "      <td id=\"T_73c8b_row7_col5\" class=\"data row7 col5\" >0.9998</td>\n",
       "      <td id=\"T_73c8b_row7_col6\" class=\"data row7 col6\" >0.9998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_73c8b_level0_row8\" class=\"row_heading level0 row8\" >8</th>\n",
       "      <td id=\"T_73c8b_row8_col0\" class=\"data row8 col0\" >0.9999</td>\n",
       "      <td id=\"T_73c8b_row8_col1\" class=\"data row8 col1\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row8_col2\" class=\"data row8 col2\" >0.9999</td>\n",
       "      <td id=\"T_73c8b_row8_col3\" class=\"data row8 col3\" >0.9999</td>\n",
       "      <td id=\"T_73c8b_row8_col4\" class=\"data row8 col4\" >0.9999</td>\n",
       "      <td id=\"T_73c8b_row8_col5\" class=\"data row8 col5\" >0.9999</td>\n",
       "      <td id=\"T_73c8b_row8_col6\" class=\"data row8 col6\" >0.9999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_73c8b_level0_row9\" class=\"row_heading level0 row9\" >9</th>\n",
       "      <td id=\"T_73c8b_row9_col0\" class=\"data row9 col0\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row9_col1\" class=\"data row9 col1\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row9_col2\" class=\"data row9 col2\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row9_col3\" class=\"data row9 col3\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row9_col4\" class=\"data row9 col4\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row9_col5\" class=\"data row9 col5\" >1.0000</td>\n",
       "      <td id=\"T_73c8b_row9_col6\" class=\"data row9 col6\" >1.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_73c8b_level0_row10\" class=\"row_heading level0 row10\" >Mean</th>\n",
       "      <td id=\"T_73c8b_row10_col0\" class=\"data row10 col0\" >0.9999</td>\n",
       "      <td id=\"T_73c8b_row10_col1\" class=\"data row10 col1\" >0.8000</td>\n",
       "      <td id=\"T_73c8b_row10_col2\" class=\"data row10 col2\" >0.9999</td>\n",
       "      <td id=\"T_73c8b_row10_col3\" class=\"data row10 col3\" >0.9999</td>\n",
       "      <td id=\"T_73c8b_row10_col4\" class=\"data row10 col4\" >0.9999</td>\n",
       "      <td id=\"T_73c8b_row10_col5\" class=\"data row10 col5\" >0.9999</td>\n",
       "      <td id=\"T_73c8b_row10_col6\" class=\"data row10 col6\" >0.9999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_73c8b_level0_row11\" class=\"row_heading level0 row11\" >Std</th>\n",
       "      <td id=\"T_73c8b_row11_col0\" class=\"data row11 col0\" >0.0001</td>\n",
       "      <td id=\"T_73c8b_row11_col1\" class=\"data row11 col1\" >0.4000</td>\n",
       "      <td id=\"T_73c8b_row11_col2\" class=\"data row11 col2\" >0.0001</td>\n",
       "      <td id=\"T_73c8b_row11_col3\" class=\"data row11 col3\" >0.0001</td>\n",
       "      <td id=\"T_73c8b_row11_col4\" class=\"data row11 col4\" >0.0001</td>\n",
       "      <td id=\"T_73c8b_row11_col5\" class=\"data row11 col5\" >0.0001</td>\n",
       "      <td id=\"T_73c8b_row11_col6\" class=\"data row11 col6\" >0.0001</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n"
      ],
      "text/plain": [
       "<pandas.io.formats.style.Styler at 0x7fa408557340>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "rf = create_model('rf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "c54f399c-6c8d-44b5-a8ec-85dd8f62838f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2412b449950c45b2859d7f38a2e12b48",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "evaluate_model(rf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "aa995388-5593-4553-b2e1-6b4bdabae8dd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['flow_id', 'source_ip', 'source_port', 'destination_ip',\n",
       "       'destination_port', 'protocol', 'Timestamp', 'Flow Duration',\n",
       "       'Total Fwd Packets', 'Total Backward Packets',\n",
       "       'Total Length of Fwd Packets', 'Total Length of Bwd Packets',\n",
       "       'Fwd Packet Length Max', 'Fwd Packet Length Min',\n",
       "       'Fwd Packet Length Mean', 'Fwd Packet Length Std',\n",
       "       'Bwd Packet Length Max', 'Bwd Packet Length Min',\n",
       "       'Bwd Packet Length Mean', 'Bwd Packet Length Std', 'Flow Bytes/s',\n",
       "       'Flow Packets/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max',\n",
       "       'Flow IAT Min', 'Fwd IAT Total', 'Fwd IAT Mean', 'Fwd IAT Std',\n",
       "       'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Total', 'Bwd IAT Mean',\n",
       "       'Bwd IAT Std', 'Bwd IAT Max', 'Bwd IAT Min', 'Fwd PSH Flags',\n",
       "       'Bwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags', 'Fwd Header Length',\n",
       "       'Bwd Header Length', 'Fwd Packets/s', 'Bwd Packets/s',\n",
       "       'Min Packet Length', 'Max Packet Length', 'Packet Length Mean',\n",
       "       'Packet Length Std', 'Packet Length Variance', 'FIN Flag Count',\n",
       "       'SYN Flag Count', 'RST Flag Count', 'PSH Flag Count', 'ACK Flag Count',\n",
       "       'URG Flag Count', 'CWE Flag Count', 'ECE Flag Count', 'Down/Up Ratio',\n",
       "       'Average Packet Size', 'Avg Fwd Segment Size', 'Avg Bwd Segment Size',\n",
       "       'Fwd Avg Bytes/Bulk', 'Fwd Avg Packets/Bulk', 'Fwd Avg Bulk Rate',\n",
       "       'Bwd Avg Bytes/Bulk', 'Bwd Avg Packets/Bulk', 'Bwd Avg Bulk Rate',\n",
       "       'Subflow Fwd Packets', 'Subflow Fwd Bytes', 'Subflow Bwd Packets',\n",
       "       'Subflow Bwd Bytes', 'Init_Win_bytes_forward',\n",
       "       'Init_Win_bytes_backward', 'act_data_pkt_fwd', 'min_seg_size_forward',\n",
       "       'Active Mean', 'Active Std', 'Active Max', 'Active Min', 'Idle Mean',\n",
       "       'Idle Std', 'Idle Max', 'Idle Min', 'attack_label'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "flow1.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "eb35cca3-d9ad-4d33-ad82-0837a9aa252b",
   "metadata": {},
   "outputs": [],
   "source": [
    "flow1 = flow1[['source_ip', 'source_port', 'destination_ip', 'destination_port', 'protocol', 'Fwd Packets/s', 'Bwd Packets/s', 'Flow Bytes/s', 'attack_label']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "6fbe4a6e-5848-4146-aa11-275d7c702d7a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>source_ip</th>\n",
       "      <th>source_port</th>\n",
       "      <th>destination_ip</th>\n",
       "      <th>destination_port</th>\n",
       "      <th>protocol</th>\n",
       "      <th>Fwd Packets/s</th>\n",
       "      <th>Bwd Packets/s</th>\n",
       "      <th>Flow Bytes/s</th>\n",
       "      <th>attack_label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8.254.250.126</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.5</td>\n",
       "      <td>49188.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>5.000000e+05</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>3.000000e+06</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>8.254.250.126</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.5</td>\n",
       "      <td>49188.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>2.000000e+06</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.200000e+07</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8.253.185.121</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>49486.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>6.666667e+05</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.000000e+06</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>8.253.185.121</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.14</td>\n",
       "      <td>49486.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>2.000000e+06</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.200000e+07</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>192.168.10.9</td>\n",
       "      <td>1031.0</td>\n",
       "      <td>192.168.10.3</td>\n",
       "      <td>88.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>1.149425e+04</td>\n",
       "      <td>6568.144499</td>\n",
       "      <td>1.474548e+06</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827672</th>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61374.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>1.639344e+04</td>\n",
       "      <td>16393.442620</td>\n",
       "      <td>1.967213e+05</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827673</th>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61378.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>1.388889e+04</td>\n",
       "      <td>13888.888890</td>\n",
       "      <td>1.666667e+05</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827674</th>\n",
       "      <td>72.21.91.29</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61375.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>1.333333e+04</td>\n",
       "      <td>13333.333330</td>\n",
       "      <td>1.600000e+05</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827675</th>\n",
       "      <td>8.41.222.187</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61323.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>4.166667e+04</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.500000e+05</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2827676</th>\n",
       "      <td>8.43.72.21</td>\n",
       "      <td>80.0</td>\n",
       "      <td>192.168.10.15</td>\n",
       "      <td>61326.0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>1.470588e+04</td>\n",
       "      <td>14705.882350</td>\n",
       "      <td>1.764706e+05</td>\n",
       "      <td>BENIGN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2827677 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             source_ip  source_port destination_ip  destination_port protocol  \\\n",
       "0        8.254.250.126         80.0   192.168.10.5           49188.0      tcp   \n",
       "1        8.254.250.126         80.0   192.168.10.5           49188.0      tcp   \n",
       "2        8.253.185.121         80.0  192.168.10.14           49486.0      tcp   \n",
       "3        8.253.185.121         80.0  192.168.10.14           49486.0      tcp   \n",
       "4         192.168.10.9       1031.0   192.168.10.3              88.0      tcp   \n",
       "...                ...          ...            ...               ...      ...   \n",
       "2827672    72.21.91.29         80.0  192.168.10.15           61374.0      tcp   \n",
       "2827673    72.21.91.29         80.0  192.168.10.15           61378.0      tcp   \n",
       "2827674    72.21.91.29         80.0  192.168.10.15           61375.0      tcp   \n",
       "2827675   8.41.222.187         80.0  192.168.10.15           61323.0      tcp   \n",
       "2827676     8.43.72.21         80.0  192.168.10.15           61326.0      tcp   \n",
       "\n",
       "         Fwd Packets/s  Bwd Packets/s  Flow Bytes/s attack_label  \n",
       "0         5.000000e+05       0.000000  3.000000e+06       BENIGN  \n",
       "1         2.000000e+06       0.000000  1.200000e+07       BENIGN  \n",
       "2         6.666667e+05       0.000000  4.000000e+06       BENIGN  \n",
       "3         2.000000e+06       0.000000  1.200000e+07       BENIGN  \n",
       "4         1.149425e+04    6568.144499  1.474548e+06       BENIGN  \n",
       "...                ...            ...           ...          ...  \n",
       "2827672   1.639344e+04   16393.442620  1.967213e+05       BENIGN  \n",
       "2827673   1.388889e+04   13888.888890  1.666667e+05       BENIGN  \n",
       "2827674   1.333333e+04   13333.333330  1.600000e+05       BENIGN  \n",
       "2827675   4.166667e+04       0.000000  2.500000e+05       BENIGN  \n",
       "2827676   1.470588e+04   14705.882350  1.764706e+05       BENIGN  \n",
       "\n",
       "[2827677 rows x 9 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "flow1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "60fc5cf3-69e7-4998-9a21-2d47719e7e6d",
   "metadata": {},
   "outputs": [],
   "source": [
    "flow2 = pd.read_csv('./unsw_flow1.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "22a6ddfa-03aa-46d5-b5b1-147a3ab59dfe",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>source_ip</th>\n",
       "      <th>source_port</th>\n",
       "      <th>destination_ip</th>\n",
       "      <th>destination_port</th>\n",
       "      <th>protocol</th>\n",
       "      <th>dur</th>\n",
       "      <th>sbytes</th>\n",
       "      <th>dbytes</th>\n",
       "      <th>sload</th>\n",
       "      <th>dload</th>\n",
       "      <th>spkts</th>\n",
       "      <th>dpkts</th>\n",
       "      <th>attack_label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>59.166.0.0</td>\n",
       "      <td>1390</td>\n",
       "      <td>149.171.126.6</td>\n",
       "      <td>53</td>\n",
       "      <td>udp</td>\n",
       "      <td>0.001055</td>\n",
       "      <td>132</td>\n",
       "      <td>164</td>\n",
       "      <td>500473.937500</td>\n",
       "      <td>6.218009e+05</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>59.166.0.0</td>\n",
       "      <td>33661</td>\n",
       "      <td>149.171.126.9</td>\n",
       "      <td>1024</td>\n",
       "      <td>udp</td>\n",
       "      <td>0.036133</td>\n",
       "      <td>528</td>\n",
       "      <td>304</td>\n",
       "      <td>87676.085940</td>\n",
       "      <td>5.048017e+04</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>59.166.0.6</td>\n",
       "      <td>1464</td>\n",
       "      <td>149.171.126.7</td>\n",
       "      <td>53</td>\n",
       "      <td>udp</td>\n",
       "      <td>0.001119</td>\n",
       "      <td>146</td>\n",
       "      <td>178</td>\n",
       "      <td>521894.531300</td>\n",
       "      <td>6.362824e+05</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>59.166.0.5</td>\n",
       "      <td>3593</td>\n",
       "      <td>149.171.126.5</td>\n",
       "      <td>53</td>\n",
       "      <td>udp</td>\n",
       "      <td>0.001209</td>\n",
       "      <td>132</td>\n",
       "      <td>164</td>\n",
       "      <td>436724.562500</td>\n",
       "      <td>5.425972e+05</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>59.166.0.3</td>\n",
       "      <td>49664</td>\n",
       "      <td>149.171.126.0</td>\n",
       "      <td>53</td>\n",
       "      <td>udp</td>\n",
       "      <td>0.001169</td>\n",
       "      <td>146</td>\n",
       "      <td>178</td>\n",
       "      <td>499572.250000</td>\n",
       "      <td>6.090676e+05</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2059410</th>\n",
       "      <td>59.166.0.1</td>\n",
       "      <td>38606</td>\n",
       "      <td>149.171.126.9</td>\n",
       "      <td>80</td>\n",
       "      <td>tcp</td>\n",
       "      <td>0.564998</td>\n",
       "      <td>14106</td>\n",
       "      <td>772406</td>\n",
       "      <td>198981.250000</td>\n",
       "      <td>1.091598e+07</td>\n",
       "      <td>262</td>\n",
       "      <td>526</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2059411</th>\n",
       "      <td>59.166.0.1</td>\n",
       "      <td>38606</td>\n",
       "      <td>149.171.126.9</td>\n",
       "      <td>80</td>\n",
       "      <td>tcp</td>\n",
       "      <td>0.564998</td>\n",
       "      <td>14106</td>\n",
       "      <td>772406</td>\n",
       "      <td>198981.250000</td>\n",
       "      <td>1.091598e+07</td>\n",
       "      <td>262</td>\n",
       "      <td>526</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2059412</th>\n",
       "      <td>59.166.0.5</td>\n",
       "      <td>33094</td>\n",
       "      <td>149.171.126.7</td>\n",
       "      <td>43433</td>\n",
       "      <td>tcp</td>\n",
       "      <td>0.087306</td>\n",
       "      <td>320</td>\n",
       "      <td>1828</td>\n",
       "      <td>24465.671880</td>\n",
       "      <td>1.466108e+05</td>\n",
       "      <td>6</td>\n",
       "      <td>8</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2059413</th>\n",
       "      <td>59.166.0.9</td>\n",
       "      <td>35433</td>\n",
       "      <td>149.171.126.0</td>\n",
       "      <td>80</td>\n",
       "      <td>tcp</td>\n",
       "      <td>2.200934</td>\n",
       "      <td>3498</td>\n",
       "      <td>166054</td>\n",
       "      <td>12496.513670</td>\n",
       "      <td>5.983751e+05</td>\n",
       "      <td>58</td>\n",
       "      <td>116</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2059414</th>\n",
       "      <td>175.45.176.0</td>\n",
       "      <td>17293</td>\n",
       "      <td>149.171.126.17</td>\n",
       "      <td>110</td>\n",
       "      <td>tcp</td>\n",
       "      <td>0.942984</td>\n",
       "      <td>574</td>\n",
       "      <td>676</td>\n",
       "      <td>4470.913574</td>\n",
       "      <td>5.259898e+03</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>exploits</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2059415 rows × 13 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            source_ip  source_port  destination_ip  destination_port protocol  \\\n",
       "0          59.166.0.0         1390   149.171.126.6                53      udp   \n",
       "1          59.166.0.0        33661   149.171.126.9              1024      udp   \n",
       "2          59.166.0.6         1464   149.171.126.7                53      udp   \n",
       "3          59.166.0.5         3593   149.171.126.5                53      udp   \n",
       "4          59.166.0.3        49664   149.171.126.0                53      udp   \n",
       "...               ...          ...             ...               ...      ...   \n",
       "2059410    59.166.0.1        38606   149.171.126.9                80      tcp   \n",
       "2059411    59.166.0.1        38606   149.171.126.9                80      tcp   \n",
       "2059412    59.166.0.5        33094   149.171.126.7             43433      tcp   \n",
       "2059413    59.166.0.9        35433   149.171.126.0                80      tcp   \n",
       "2059414  175.45.176.0        17293  149.171.126.17               110      tcp   \n",
       "\n",
       "              dur  sbytes  dbytes          sload         dload  spkts  dpkts  \\\n",
       "0        0.001055     132     164  500473.937500  6.218009e+05      2      2   \n",
       "1        0.036133     528     304   87676.085940  5.048017e+04      4      4   \n",
       "2        0.001119     146     178  521894.531300  6.362824e+05      2      2   \n",
       "3        0.001209     132     164  436724.562500  5.425972e+05      2      2   \n",
       "4        0.001169     146     178  499572.250000  6.090676e+05      2      2   \n",
       "...           ...     ...     ...            ...           ...    ...    ...   \n",
       "2059410  0.564998   14106  772406  198981.250000  1.091598e+07    262    526   \n",
       "2059411  0.564998   14106  772406  198981.250000  1.091598e+07    262    526   \n",
       "2059412  0.087306     320    1828   24465.671880  1.466108e+05      6      8   \n",
       "2059413  2.200934    3498  166054   12496.513670  5.983751e+05     58    116   \n",
       "2059414  0.942984     574     676    4470.913574  5.259898e+03     12     12   \n",
       "\n",
       "        attack_label  \n",
       "0             normal  \n",
       "1             normal  \n",
       "2             normal  \n",
       "3             normal  \n",
       "4             normal  \n",
       "...              ...  \n",
       "2059410       normal  \n",
       "2059411       normal  \n",
       "2059412       normal  \n",
       "2059413       normal  \n",
       "2059414     exploits  \n",
       "\n",
       "[2059415 rows x 13 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "flow2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "e847a694-d9e1-4b70-95d0-71feeddc39cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "flow2['Fwd Packets/s'] = flow2['spkts']/flow2['dur']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "5152f246-0e84-4aca-b625-af91040038d8",
   "metadata": {},
   "outputs": [],
   "source": [
    "flow2['Bwd Packets/s'] = flow2['dpkts']/flow2['dur']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "6fe46afa-81a8-44ea-b2e1-da05bfd3279b",
   "metadata": {},
   "outputs": [],
   "source": [
    "flow2['Flow Bytes/s'] = (flow2['sbytes']+flow2['dbytes'])/flow2['dur']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "568c9322-965e-4ccf-8adc-b1120fdc60ac",
   "metadata": {},
   "outputs": [],
   "source": [
    "flow2 = flow2[['source_ip', 'source_port', 'destination_ip', 'destination_port', 'protocol', 'Fwd Packets/s', 'Bwd Packets/s', 'Flow Bytes/s', 'attack_label']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "20fe5aab-0164-4c57-ac45-4625141967ed",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>source_ip</th>\n",
       "      <th>source_port</th>\n",
       "      <th>destination_ip</th>\n",
       "      <th>destination_port</th>\n",
       "      <th>protocol</th>\n",
       "      <th>Fwd Packets/s</th>\n",
       "      <th>Bwd Packets/s</th>\n",
       "      <th>Flow Bytes/s</th>\n",
       "      <th>attack_label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>59.166.0.0</td>\n",
       "      <td>1390</td>\n",
       "      <td>149.171.126.6</td>\n",
       "      <td>53</td>\n",
       "      <td>udp</td>\n",
       "      <td>1895.734597</td>\n",
       "      <td>1895.734597</td>\n",
       "      <td>2.805687e+05</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>59.166.0.0</td>\n",
       "      <td>33661</td>\n",
       "      <td>149.171.126.9</td>\n",
       "      <td>1024</td>\n",
       "      <td>udp</td>\n",
       "      <td>110.702128</td>\n",
       "      <td>110.702128</td>\n",
       "      <td>2.302604e+04</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>59.166.0.6</td>\n",
       "      <td>1464</td>\n",
       "      <td>149.171.126.7</td>\n",
       "      <td>53</td>\n",
       "      <td>udp</td>\n",
       "      <td>1787.310098</td>\n",
       "      <td>1787.310098</td>\n",
       "      <td>2.895442e+05</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>59.166.0.5</td>\n",
       "      <td>3593</td>\n",
       "      <td>149.171.126.5</td>\n",
       "      <td>53</td>\n",
       "      <td>udp</td>\n",
       "      <td>1654.259719</td>\n",
       "      <td>1654.259719</td>\n",
       "      <td>2.448304e+05</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>59.166.0.3</td>\n",
       "      <td>49664</td>\n",
       "      <td>149.171.126.0</td>\n",
       "      <td>53</td>\n",
       "      <td>udp</td>\n",
       "      <td>1710.863986</td>\n",
       "      <td>1710.863986</td>\n",
       "      <td>2.771600e+05</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2059410</th>\n",
       "      <td>59.166.0.1</td>\n",
       "      <td>38606</td>\n",
       "      <td>149.171.126.9</td>\n",
       "      <td>80</td>\n",
       "      <td>tcp</td>\n",
       "      <td>463.718456</td>\n",
       "      <td>930.976747</td>\n",
       "      <td>1.392062e+06</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2059411</th>\n",
       "      <td>59.166.0.1</td>\n",
       "      <td>38606</td>\n",
       "      <td>149.171.126.9</td>\n",
       "      <td>80</td>\n",
       "      <td>tcp</td>\n",
       "      <td>463.718456</td>\n",
       "      <td>930.976747</td>\n",
       "      <td>1.392062e+06</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2059412</th>\n",
       "      <td>59.166.0.5</td>\n",
       "      <td>33094</td>\n",
       "      <td>149.171.126.7</td>\n",
       "      <td>43433</td>\n",
       "      <td>tcp</td>\n",
       "      <td>68.723799</td>\n",
       "      <td>91.631732</td>\n",
       "      <td>2.460312e+04</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2059413</th>\n",
       "      <td>59.166.0.9</td>\n",
       "      <td>35433</td>\n",
       "      <td>149.171.126.0</td>\n",
       "      <td>80</td>\n",
       "      <td>tcp</td>\n",
       "      <td>26.352449</td>\n",
       "      <td>52.704897</td>\n",
       "      <td>7.703639e+04</td>\n",
       "      <td>normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2059414</th>\n",
       "      <td>175.45.176.0</td>\n",
       "      <td>17293</td>\n",
       "      <td>149.171.126.17</td>\n",
       "      <td>110</td>\n",
       "      <td>tcp</td>\n",
       "      <td>12.725561</td>\n",
       "      <td>12.725561</td>\n",
       "      <td>1.325579e+03</td>\n",
       "      <td>exploits</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2059415 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            source_ip  source_port  destination_ip  destination_port protocol  \\\n",
       "0          59.166.0.0         1390   149.171.126.6                53      udp   \n",
       "1          59.166.0.0        33661   149.171.126.9              1024      udp   \n",
       "2          59.166.0.6         1464   149.171.126.7                53      udp   \n",
       "3          59.166.0.5         3593   149.171.126.5                53      udp   \n",
       "4          59.166.0.3        49664   149.171.126.0                53      udp   \n",
       "...               ...          ...             ...               ...      ...   \n",
       "2059410    59.166.0.1        38606   149.171.126.9                80      tcp   \n",
       "2059411    59.166.0.1        38606   149.171.126.9                80      tcp   \n",
       "2059412    59.166.0.5        33094   149.171.126.7             43433      tcp   \n",
       "2059413    59.166.0.9        35433   149.171.126.0                80      tcp   \n",
       "2059414  175.45.176.0        17293  149.171.126.17               110      tcp   \n",
       "\n",
       "         Fwd Packets/s  Bwd Packets/s  Flow Bytes/s attack_label  \n",
       "0          1895.734597    1895.734597  2.805687e+05       normal  \n",
       "1           110.702128     110.702128  2.302604e+04       normal  \n",
       "2          1787.310098    1787.310098  2.895442e+05       normal  \n",
       "3          1654.259719    1654.259719  2.448304e+05       normal  \n",
       "4          1710.863986    1710.863986  2.771600e+05       normal  \n",
       "...                ...            ...           ...          ...  \n",
       "2059410     463.718456     930.976747  1.392062e+06       normal  \n",
       "2059411     463.718456     930.976747  1.392062e+06       normal  \n",
       "2059412      68.723799      91.631732  2.460312e+04       normal  \n",
       "2059413      26.352449      52.704897  7.703639e+04       normal  \n",
       "2059414      12.725561      12.725561  1.325579e+03     exploits  \n",
       "\n",
       "[2059415 rows x 9 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "flow2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "2f1f7d9e-f02c-4e42-8780-44f923f4c170",
   "metadata": {},
   "outputs": [],
   "source": [
    "flow1.to_csv('./cicids_flow1.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "6f53ab39-5c95-4885-b152-04593b82ef09",
   "metadata": {},
   "outputs": [],
   "source": [
    "flow2.to_csv('./unsw_flow1.csv', index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
